In [1]:
# !pip install -q ultralytics
# !pip install -q yolov5

In [2]:
import os
import cv2
from ultralytics import YOLO

# # Load the pre-trained YOLO model
# model = YOLO('/content/best.pt')

View settings with 'yolo settings' or at '/home/himalay/.config/Ultralytics/settings.yaml'
Update settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [8]:
import cv2
from ultralytics import YOLO, solutions

def count_potholes_in_region(video_path, output_video_path, model_path):
    """Track, count, and display the size of potholes in a specific region within a video."""
    model = YOLO(model_path)
    cap = cv2.VideoCapture(video_path)
    assert cap.isOpened(), "Error reading video file"
    w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
    region_points = [(20, 600), (1240, 604), (1240, 560), (20, 560)]
    video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
    counter = solutions.ObjectCounter(
        view_img=True, reg_pts=region_points, names=model.names, draw_tracks=True, line_thickness=2, view_in_counts=False, view_out_counts=False, region_thickness = 0
    )

    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Video frame is empty or video processing has been successfully completed.")
            break

        # Track potholes in the video
        results = model.track(frame, persist=True, show=False)

        # Initialize pothole count
        pothole_count = 0

        for result in results:
            for box in result.boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                pothole_count += 1

                # Calculate the size (area) of the pothole
                width = x2 - x1
                height = y2 - y1
                area = width * height

                # Draw the bounding box
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

                # Display the size of the pothole at the bottom of the rectangle
                cv2.putText(frame, f"Size: {width} x {height}", (x1, y2 + 20),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # # Draw the total pothole count on the frame
        # cv2.putText(frame, f"Pothole Count: {pothole_count}", (50, 50),
        #             cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Count and display only potholes
        frame = counter.start_counting(frame, results)

        # Write the frame with the pothole count and sizes to the output video
        video_writer.write(frame)

    cap.release()
    video_writer.release()
    cv2.destroyAllWindows()

# Example usage
count_potholes_in_region("sample_video.mp4", "output_video.mp4", "potholes_last.pt")


Polygon Counter Initiated.

0: 384x640 2 Potholes, 55.6ms
Speed: 2.0ms preprocess, 55.6ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 56.0ms
Speed: 1.3ms preprocess, 56.0ms inference, 2.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 57.9ms
Speed: 1.2ms preprocess, 57.9ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Potholes, 56.3ms
Speed: 1.9ms preprocess, 56.3ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 54.2ms
Speed: 1.8ms preprocess, 54.2ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 51.3ms
Speed: 1.2ms preprocess, 51.3ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 53.7ms
Speed: 1.8ms preprocess, 53.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 56.1ms
Speed: 6.0ms preprocess, 56.1ms inference, 2.2

In [9]:
from collections import defaultdict

import cv2

from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

track_history = defaultdict(lambda: [])

model = YOLO("potholes_last.pt")  # segmentation model
cap = cv2.VideoCapture("sample_video.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

out = cv2.VideoWriter("output_video.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))

while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    annotator = Annotator(im0, line_width=2)

    results = model.track(im0, persist=True)

    if results[0].boxes.id is not None and results[0].masks is not None:
        masks = results[0].masks.xy
        track_ids = results[0].boxes.id.int().cpu().tolist()

        for mask, track_id in zip(masks, track_ids):
            color = colors(int(track_id), True)
            txt_color = annotator.get_txt_color(color)
            annotator.seg_bbox(mask=mask, mask_color=color, label=str(track_id), txt_color=txt_color)

    out.write(im0)
    cv2.imshow("instance-segmentation-object-tracking", im0)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

out.release()
cap.release()
cv2.destroyAllWindows()


0: 384x640 2 Potholes, 110.3ms
Speed: 14.7ms preprocess, 110.3ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 73.6ms
Speed: 2.0ms preprocess, 73.6ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 57.0ms
Speed: 1.7ms preprocess, 57.0ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Potholes, 64.1ms
Speed: 1.5ms preprocess, 64.1ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 61.8ms
Speed: 2.4ms preprocess, 61.8ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 52.9ms
Speed: 1.2ms preprocess, 52.9ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 54.7ms
Speed: 1.4ms preprocess, 54.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 53.0ms
Speed: 1.1ms preprocess, 53.0ms inference, 2.0ms postprocess per image

In [16]:
from collections import defaultdict

import cv2

from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

track_history = defaultdict(lambda: [])

model = YOLO("potholes_last.pt")  # segmentation model
cap = cv2.VideoCapture("sample_video.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

out = cv2.VideoWriter("output_video4.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

while True:
    ret, im0 = cap.read()
    if not ret:
        break

    annotator = Annotator(im0, line_width=2)
    results = model.track(im0, persist=True)

    if results[0].boxes.id is not None and results[0].masks is not None:
        for result in results:
            for box in result.boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])

                # Calculate the size (area) of the pothole
                width = x2 - x1
                height = y2 - y1
                area = width * height

                # Draw the bounding box
                cv2.rectangle(im0, (x1, y1), (x2, y2), (0, 255, 0), 2)

                # Display the size of the pothole at the bottom of the rectangle
                cv2.putText(im0, f"Size: {width} x {height}", (x1, y2 + 20),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
                
        masks = results[0].masks.xy
        track_ids = results[0].boxes.id.int().cpu().tolist()

        for mask, track_id in zip(masks, track_ids):
            annotator.seg_bbox(mask=mask, mask_color=colors(track_id, True), label=str(track_id))

    out.write(im0)
    cv2.imshow("instance-segmentation-object-tracking", im0)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

out.release()
cap.release()
cv2.destroyAllWindows()


0: 384x640 2 Potholes, 52.1ms
Speed: 2.0ms preprocess, 52.1ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 54.7ms
Speed: 2.0ms preprocess, 54.7ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 51.8ms
Speed: 2.1ms preprocess, 51.8ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Potholes, 52.3ms
Speed: 1.8ms preprocess, 52.3ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 48.3ms
Speed: 1.5ms preprocess, 48.3ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 52.2ms
Speed: 1.3ms preprocess, 52.2ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 55.3ms
Speed: 1.2ms preprocess, 55.3ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 58.7ms
Speed: 1.2ms preprocess, 58.7ms inference, 2.1ms postprocess per image at

In [20]:
from collections import defaultdict
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
import numpy as np

track_history = defaultdict(lambda: [])

model = YOLO("potholes_last.pt")  # segmentation model
cap = cv2.VideoCapture("sample_video.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

out = cv2.VideoWriter("output_video60.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

while True:
    ret, im0 = cap.read()
    if not ret:
        break

    annotator = Annotator(im0, line_width=2)
    results = model.track(im0, persist=True)

    if results[0].boxes.id is not None and results[0].masks is not None:
        masks = results[0].masks.xy  # Extract the mask coordinates
        track_ids = results[0].boxes.id.int().cpu().tolist()  # Get track IDs

        for mask, track_id in zip(masks, track_ids):
            # Convert the mask coordinates to a contour (list of points)
            contour = np.array(mask, dtype=np.int32)

            # Calculate the surface area of the mask
            area = cv2.contourArea(contour)

            # Draw the bounding box and mask
            annotator.seg_bbox(mask=mask, mask_color=colors(track_id, True), label=f'ID: {track_id}, Area: {int(area)}')

            # Optionally, you can also draw the contour on the image for visualization
            cv2.drawContours(im0, [contour], -1, (255, 0, 0), 2)  # Blue contour for visualization

    out.write(im0)
    cv2.imshow("instance-segmentation-object-tracking", im0)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

out.release()
cap.release()
cv2.destroyAllWindows()



0: 384x640 2 Potholes, 73.1ms
Speed: 3.4ms preprocess, 73.1ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 74.3ms
Speed: 2.1ms preprocess, 74.3ms inference, 3.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 77.8ms
Speed: 2.4ms preprocess, 77.8ms inference, 4.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Potholes, 65.2ms
Speed: 1.6ms preprocess, 65.2ms inference, 3.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 66.0ms
Speed: 2.3ms preprocess, 66.0ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 74.6ms
Speed: 1.6ms preprocess, 74.6ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 62.6ms
Speed: 1.8ms preprocess, 62.6ms inference, 2.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 64.6ms
Speed: 1.5ms preprocess, 64.6ms inference, 3.0ms postprocess per image at

In [None]:
import cv2
from ultralytics import YOLO, solutions

def count_potholes_in_region(video_path, output_video_path, model_path):
    """Track and count potholes in a specific region within a video."""
    model = YOLO(model_path)
    cap = cv2.VideoCapture(video_path)
    assert cap.isOpened(), "Error reading video file"
    w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
    region_points = [(20, 600), (1240, 604), (1240, 560), (20, 560)]
    video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
    counter = solutions.ObjectCounter(
        view_img=True, reg_pts=region_points, names=model.names, draw_tracks=True, line_thickness=2, view_in_counts = False, view_out_counts = False
    )

    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Video frame is empty or video processing has been successfully completed.")
            break

        # Track potholes in the video
        results = model.track(frame, persist=True, show=False)

        # Count the number of potholes
        pothole_count = len(results)

        # Draw the pothole count on the frame
        cv2.putText(frame, f"Pothole Count: {pothole_count}", (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Count and display only potholes
        frame = counter.start_counting(frame, results)

        # Write the frame with the pothole count to the output video
        video_writer.write(frame)

    cap.release()
    video_writer.release()
    cv2.destroyAllWindows()

count_potholes_in_region("/content/sample_video.mp4", "hhhoutput_video.mp4", "/content/potholes_last.pt")



Polygon Counter Initiated.

0: 384x640 2 Potholes, 11.5ms
Speed: 1.9ms preprocess, 11.5ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 10.7ms
Speed: 2.1ms preprocess, 10.7ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 10.0ms
Speed: 2.3ms preprocess, 10.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Potholes, 8.6ms
Speed: 2.4ms preprocess, 8.6ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 10.0ms
Speed: 2.4ms preprocess, 10.0ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 9.1ms
Speed: 2.0ms preprocess, 9.1ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 8.3ms
Speed: 2.4ms preprocess, 8.3ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 9.9ms
Speed: 2.0ms preprocess, 9.9ms inference, 1.8ms post

In [None]:


# Path to the input image
input_image_path = '/content/1000_F_145269819_TvcZSVnuXMYtHh5likXZZR466peqJJPd.jpg'

# Path to save the output image
output_image_path = 'pothole_image_with_bboxes.jpg'

# Read the input image
image = cv2.imread(input_image_path)

# Predict using the YOLO model
results = model(image)

# Draw bounding boxes and labels on the image
annotated_image = results[0].plot()  # This method draws bounding boxes on the image

# Save the output image with bounding boxes
cv2.imwrite(output_image_path, annotated_image)

print(f"Output image saved to {output_image_path}")



0: 448x640 8 Potholes, 192.5ms
Speed: 5.0ms preprocess, 192.5ms inference, 20.6ms postprocess per image at shape (1, 3, 448, 640)
Output image saved to pothole_image_with_bboxes.jpg


In [None]:
!pip install opencv-contrib-python




In [14]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load YOLO model
model = YOLO('potholes_last.pt')  # Updated model path

# Path to the input video and output video
input_video_path = 'sample_video.mp4'
output_video_path = 'output_video3.mp4'

# Open the input video
cap = cv2.VideoCapture(input_video_path)

# Get the video's width, height, and frames per second (fps)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Define the codec and create a VideoWriter object to save the output video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Initialize a dictionary to store tracking information
tracking_info = {}

# Process each frame in the video
frame_id = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Perform pothole detection using the YOLO model
    results = model(frame)

    for result in results:
        boxes = result.boxes
        for i, box in enumerate(boxes):
            # Extract bounding box coordinates
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()

            # Convert coordinates to integers
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

            # Calculate the size (area) of the pothole
            width = x2 - x1
            height = y2 - y1
        
            # Generate a unique label for each pothole
            label = f"Pothole {i+1}"

            # Track each pothole across frames (you can improve this with more advanced tracking methods)
            if i not in tracking_info:
                tracking_info[i] = {
                    'id': i+1,
                    'x': x1,
                    'y': y1,
                    'w': x2-x1,
                    'h': y2-y1
                }

            # Display the size of the pothole at the bottom of the rectangle
            cv2.putText(frame, f"Size: {width} x {height}", (x1, y2 + 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)


            # Draw bounding box and label on the frame
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Write the processed frame to the output video
    out.write(frame)
    frame_id += 1

# Release video resources
cap.release()
out.release()

print("Pothole detection and labeling completed. Output video saved as", output_video_path)



0: 384x640 2 Potholes, 53.3ms
Speed: 1.1ms preprocess, 53.3ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Potholes, 50.8ms
Speed: 1.1ms preprocess, 50.8ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Potholes, 50.3ms
Speed: 1.3ms preprocess, 50.3ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Potholes, 51.3ms
Speed: 1.8ms preprocess, 51.3ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Potholes, 51.5ms
Speed: 1.6ms preprocess, 51.5ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Potholes, 50.7ms
Speed: 1.4ms preprocess, 50.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Potholes, 55.5ms
Speed: 1.3ms preprocess, 55.5ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Potholes, 56.1ms
Speed: 1.5ms preprocess, 56.1ms inference, 2.0ms postprocess per image at