In [1]:
import os
import cv2
import torch
import shutil
import trainer
import ultralytics
from ultralytics import YOLO

In [7]:
# Load Video potential:
import kagglehub
#Download latest version
data_path  = "./data/potholes_video"
# create a directory to store the dataset
os.makedirs(data_path, exist_ok=True)
kaggle_path = kagglehub.dataset_download("gracehephzibahm/pothole-severity-classification")
shutil.move(kaggle_path, data_path)

# move the video file to the data folder
shutil.move(data_path + "/1/Challenge Track 2_ Pothole severity classification via computer vision/sections.mov", data_path + "/sections.mov")

# remove the old directory
shutil.rmtree(data_path + "/1")


In [50]:
def resize_video_by_width(input_video_path, output_video_path, target_width=448):
    cap = cv2.VideoCapture(input_video_path)
    
    if not cap.isOpened():
        print("Error opening video stream or file")
        return
    
    # Get the width and height of the video
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    # Calculate the aspect ratio
    scale_factor = target_width / original_width
    target_height = int(original_height * scale_factor)
    
    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (target_width, target_height))
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Resize the frame
        resized_frame = cv2.resize(frame, (target_width, target_height))
        
        # Write the resized frame
        out.write(resized_frame)
    
    # Release everything if job is finished
    cap.release()
    out.release()
    cv2.destroyAllWindows()
    
    print("Resized video saved at", output_video_path)
    
    
def resize_video_by_height(input_video_path, output_video_path, target_height=448):
    # Open the input video
    cap = cv2.VideoCapture(input_video_path)

    if not cap.isOpened():
        print("Error: Cannot open video.")
        return

    # Get original video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Calculate scaling factor and target width to maintain aspect ratio
    scale_factor = target_height / original_height
    target_width = int(original_width * scale_factor)

    # Video writer setup
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (target_width, target_height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Resize the frame
        resized_frame = cv2.resize(frame, (target_width, target_height))

        # Write the resized frame to the output video
        out.write(resized_frame)

    # Release resources
    cap.release()
    out.release()
    cv2.destroyAllWindows()

    print(f"Resized video saved as: {output_video_path}")


In [44]:
# For matplotlib RGB:
colors_rgb = {
    'minor_pothole': (0,128,1),   #008001 GREEN
    'medium_pothole': (255,166,0),  #ffa500 ORANGE
    'major_pothole': (229,0,0)      #e50000 RED
}

# For CV2 BGR:
colors_bgr = {
    'minor_pothole': (1,128,0),   #008001 GREEN
    'medium_pothole': (0,166,255),  #ffa500 ORANGE
    'major_pothole': (0,0,229)      #e50000 RED
}


def video_inference(input_video_path, output_video_path, model_name, model_path, without_severity_levels=False):
    if model_name == 'YOLO':
        model = YOLO(model_path)
    elif model_name == 'FASTERRCNN':
        if without_severity_levels is False:
            model = trainer.get_model(model_name="fasterrcnn_resnet50_fpn", with_severity_levels=True)
        else:
            model = trainer.get_model(model_name="fasterrcnn_resnet50_fpn", with_severity_levels=False)
        state_dict = torch.load(model_path, weights_only=True)
        model.load_state_dict(state_dict)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)  # Move the model to the GPU
        model.eval()  # Set the model to evaluation mode
    else:
        raise ValueError(f"Model name '{model_name}' is not supported. Please use 'YOLO' or 'FASTERRCNN'.")

    # Open video
    cap = cv2.VideoCapture(input_video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    # Run YOLO with streaming
    if model_name == 'YOLO':
        for result in model.predict(source=input_video_path, stream=True, conf=0.25, verbose=False):
            frame = result.orig_img.copy()  # Start with the original frame
            for box in result.boxes:
                # Extract bounding box and label information
                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
                confidence = box.conf[0]
                label = box.cls[0]
                class_name = model.names[int(label)]

                # Set color for the class
                color = colors_bgr.get(class_name, (255, 255, 255))  # Default to white if not found

                # Draw bounding box
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

                # Draw label and confidence
                label_text = f"{class_name} {confidence:.2f}"
                (text_width, text_height), baseline = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
                cv2.rectangle(frame, (x1, y1 - text_height - baseline), (x1 + text_width, y1), color, -1)
                cv2.putText(frame, label_text, (x1, y1 - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

            out.write(frame)  # Save frame to output video

    else:  # Faster R-CNN inference
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            # Prepare image for Faster R-CNN
            # Convert BGR to RGB
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Convert to tensor and normalize
            image_tensor = torch.from_numpy(image).permute(2, 0, 1).float() / 255.0
            # Add batch dimension
            image_tensor = image_tensor.unsqueeze(0)
            image_tensor = image_tensor.to(device)

            with torch.no_grad():
                predictions = model(image_tensor)

            # Process predictions
            boxes = predictions[0]['boxes'].cpu().numpy()
            labels = predictions[0]['labels'].cpu().numpy()
            scores = predictions[0]['scores'].cpu().numpy()

            # Filter predictions based on confidence threshold
            confidence_threshold = 0.5
            mask = scores > confidence_threshold
            boxes = boxes[mask]
            labels = labels[mask]
            scores = scores[mask]

            # Draw predictions on frame
            for box, label, score in zip(boxes, labels, scores):
                x1, y1, x2, y2 = map(int, box)
                
                if without_severity_levels:
                    # Determine color based on label
                    if label == 1:
                        color = colors_bgr['major_pothole']
                        severity = 'Pothole'
                else:
                    # Determine color based on severity level
                    if label == 1:  # Minor pothole
                        color = colors_bgr['minor_pothole']
                        severity = 'Minor'
                    elif label == 2:  # Medium pothole
                        color = colors_bgr['medium_pothole']
                        severity = 'Medium'
                    else:  # Major pothole
                        color = colors_bgr['major_pothole']
                        severity = 'Major'

                # Draw bounding box
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                
                # Add label with confidence score
                label_text = f'{severity}: {score:.2f}'
                label_size, baseline = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
                y1_label = max(y1, label_size[1])
                cv2.rectangle(frame, (x1, y1_label - label_size[1] - baseline),
                            (x1 + label_size[0], y1_label), color, cv2.FILLED)
                cv2.putText(frame, label_text, (x1, y1_label - baseline),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

            # Write frame to output video
            out.write(frame)

    # Release resources
    cap.release()
    out.release()
    cv2.destroyAllWindows()

    print(f"Processed video saved as: {output_video_path}")

In [None]:
#input_video_path = data_path + "/sections.mov"
#output_video_path = data_path + "/sections_yolo.mp4"
input_video_path = data_path + "/pothole_70mph_vid.mp4"
output_video_path = data_path + "/pothole_70mph_vid_yolo.mp4"
# input_video_path = data_path + "/potholes_at_night_vid.mp4"
# output_video_path = data_path + "/potholes_at_night_vid_yolo.mp4"


# Run video inference with YOLO
video_inference(input_video_path, output_video_path, model_name='YOLO', model_path='data/models/yolov8m/runs/yolov8m_severity_train_aug/weights/best.pt')

Processed video saved as: ./data/potholes_video/pothole_70mph_vid_yolo.mp4


In [53]:
input_video_path = data_path + "/sections.mov"
resized_video_path = data_path + "/sections_resized.mp4"
resize_video_by_width(input_video_path=input_video_path, output_video_path=resized_video_path, target_width=448)
#resize_video_by_height(input_video_path=input_video_path, output_video_path=resized_video_path, target_height=448)

Resized video saved at ./data/potholes_video/sections_resized.mp4


In [56]:
output_video_path = data_path + "/sections_fasterrcnn_severities.mp4"
#output_video_path = data_path + "/pothole_70mph_vid_fasterrcnn.mp4"

#output_video_path = data_path + "/potholes_at_night_vid_fasterrcnn.mp4"\
#model_path = 'data/models/fasterrcnn_resnet50_fpn/fasterrcnn_resnet50_fpn_best.pth'
model_path = 'data/models/fasterrcnn_resnet_severity/fasterrcnn_resnet50_fpn_best.pth'
video_inference(resized_video_path, output_video_path, model_name='FASTERRCNN', model_path=model_path, without_severity_levels=False)

Processed video saved as: ./data/potholes_video/sections_fasterrcnn_severities.mp4
