In [1]:
from ultralytics import YOLO
import cv2

# Images 

# Load pretrained YOLOv8 model
model = YOLO('yolov8n.pt')

results = model('Images/test3.jpg') 
result_image = results[0].plot() 

cv2.imshow("YOLOv8 Predictions", result_image)
cv2.waitKey(0) 
cv2.destroyAllWindows()



image 1/1 c:\Users\ignat\Documents\Python projects\Vedere artificiala\Lab6\Images\test3.jpg: 640x448 8 persons, 3 ties, 191.0ms
Speed: 18.0ms preprocess, 191.0ms inference, 24.0ms postprocess per image at shape (1, 3, 640, 448)


In [None]:
# Videos

video_path = 'Videos/IMG_0650.mov'  
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Could not open the .mov video file.")
    exit()

# output video 
output_path = 'output_video.mov'
fourcc = cv2.VideoWriter_fourcc(*'avc1')  # codec for .mov 
fps = int(cap.get(cv2.CAP_PROP_FPS))      # frames per second
frame_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
out = cv2.VideoWriter(output_path, fourcc, fps, frame_size)

# Process the video frame by frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)
    annotated_frame = results[0].plot()
    out.write(annotated_frame)

    # Display the frame 
    cv2.imshow("YOLOv8 Video Detection", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):  
        break

cap.release()
out.release()
cv2.destroyAllWindows()



0: 640x384 8 cars, 1 bus, 77.1ms
Speed: 3.0ms preprocess, 77.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 8 cars, 1 bus, 102.0ms
Speed: 5.0ms preprocess, 102.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 8 cars, 1 bus, 74.0ms
Speed: 3.0ms preprocess, 74.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 9 cars, 59.0ms
Speed: 3.0ms preprocess, 59.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 8 cars, 61.0ms
Speed: 2.0ms preprocess, 61.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 8 cars, 63.0ms
Speed: 3.0ms preprocess, 63.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 9 cars, 1 truck, 58.0ms
Speed: 2.0ms preprocess, 58.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 9 cars, 59.0ms
Speed: 3.0ms preprocess, 59.0ms inference, 1.0ms postprocess per image at

In [2]:
# Track vehicles in dataset, default model yolov8n
import cv2
import os
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort


images_folder = "C:\\Users\\ignat\\.cache\\kagglehub\\datasets\\trainingdatapro\\cars-video-object-tracking\\versions\\3\\images"
output_folder = "output_images"
os.makedirs(output_folder, exist_ok=True)

model = YOLO("yolov8n.pt")

tracker = DeepSort(max_age=5)

# Process each image in the folder
for image_name in os.listdir(images_folder):
    image_path = os.path.join(images_folder, image_name)
    image = cv2.imread(image_path)

    if image is None:
        print(f"Could not read image {image_name}. Skipping...")
        continue

    # Run YOLO detection
    results = model(image)

    # Extract detections
    bbs = []  # List of (bbox, confidence, class) tuples
    for result in results[0].boxes:
        class_id = int(result.cls)
        class_name = results[0].names[class_id]
        confidence = float(result.conf)
        x1, y1, x2, y2 = map(int, result.xyxy[0].tolist())

        if class_name == 'car':
            bbs.append(([x1, y1, x2 - x1, y2 - y1], confidence, class_name))

        cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 1)  # Blue box
        cv2.putText(image,f"{class_name} ({confidence:.2f})",(x1 - 160, (y1+y2)//2),cv2.FONT_HERSHEY_SIMPLEX,0.9,(255, 0, 0),1)

    # Run Deep SORT tracking
    tracks = tracker.update_tracks(bbs, frame=image)

    # Draw boxes for each tracker
    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        ltrb = track.to_ltrb()
        x1, y1, x2, y2 = map(int, ltrb)

        # Draw the tracked car
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 1)
        cv2.putText(image, f'Track ID {track_id}', (x1 + 100, (y1+y2)//2), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 1)

    # Calculate and display percentages
    car_count = len([track for track in tracks if track.det_class == 'car'])
    minivan_count = len([track for track in tracks if track.det_class == 'minivan'])
    truck_count = len([track for track in tracks if track.det_class == 'truck'])
    person_count = len([track for track in tracks if track.det_class == 'person'])

    total_objects = car_count + minivan_count + truck_count + person_count
    if total_objects > 0:
        cv2.putText(image, f'Cars: {car_count} ({car_count/total_objects*100:.1f}%)', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 1)
        cv2.putText(image, f'Minivans: {minivan_count} ({minivan_count/total_objects*100:.1f}%)', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 1)
        cv2.putText(image, f'Trucks: {truck_count} ({truck_count/total_objects*100:.1f}%)', (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 1)
        cv2.putText(image, f'Persons: {person_count} ({person_count/total_objects*100:.1f}%)', (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 1)

    # Save the annotated image
    output_path = os.path.join(output_folder, image_name)
    cv2.imwrite(output_path, image)

    # Display the number of cars
    print(f"Processed {image_name}: Cars: {car_count}, Minivans: {minivan_count}, Trucks: {truck_count}, Persons: {person_count}")




0: 384x640 3 persons, 5 cars, 1 bus, 58.0ms
Speed: 4.0ms preprocess, 58.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Detections for frame_000000.PNG: [([423, 119, 65, 59], 0.7431023120880127, 'car'), ([605, 219, 71, 83], 0.6717180013656616, 'car'), ([339, 415, 139, 130], 0.6230671405792236, 'car'), ([535, 116, 58, 48], 0.6167604327201843, 'car'), ([833, 730, 97, 53], 0.3178289234638214, 'car')]
Trackers after update: [<deep_sort_realtime.deep_sort.track.Track object at 0x000001B39AEEF8B0>, <deep_sort_realtime.deep_sort.track.Track object at 0x000001B39AEEF2E0>, <deep_sort_realtime.deep_sort.track.Track object at 0x000001B39AEEFD90>, <deep_sort_realtime.deep_sort.track.Track object at 0x000001B39AEDECD0>, <deep_sort_realtime.deep_sort.track.Track object at 0x000001B39AEDEDC0>]
Processed frame_000000.PNG: Cars: 5, Minivans: 0, Trucks: 0, Persons: 0

0: 384x640 2 persons, 6 cars, 1 bus, 56.0ms
Speed: 2.0ms preprocess, 56.0ms inference, 1.0ms postprocess per image 

In [8]:
import cv2
import os
from ultralytics import YOLO
from sklearn.metrics import precision_score, recall_score, f1_score


def iou(box1, box2):
    """Calculate Intersection over Union (IoU) for two bounding boxes."""
    x1, y1, x2, y2 = box1
    x1p, y1p, x2p, y2p = box2

    xi1, yi1 = max(x1, x1p), max(y1, y1p)
    xi2, yi2 = min(x2, x2p), min(y2, y2p)
    inter_area = max(0, xi2 - xi1 + 1) * max(0, yi2 - yi1 + 1)

    box1_area = (x2 - x1 + 1) * (y2 - y1 + 1)
    box2_area = (x2p - x1p + 1) * (y2p - y1p + 1)

    union_area = box1_area + box2_area - inter_area
    return inter_area / union_area if union_area > 0 else 0


def evaluate_model(detections, ground_truths):
    """Evaluate precision, recall, and F1 score for a single image."""
    y_true = []
    y_pred = []

    # Match detections to ground truths
    matched_detections = set()
    for gt in ground_truths:
        gt_matched = False
        for i, det in enumerate(detections):
            if i in matched_detections:
                continue
            if iou(gt[:4], det[:4]) >= 0.5:
                y_true.append(1)  # True positive
                y_pred.append(1)
                matched_detections.add(i)
                gt_matched = True
                break
        if not gt_matched:
            y_true.append(1)  # False negative
            y_pred.append(0)

    # Handle unmatched detections (false positives)
    for i, det in enumerate(detections):
        if i not in matched_detections:
            y_true.append(0)
            y_pred.append(1)

    precision = precision_score(y_true, y_pred, zero_division=1)
    recall = recall_score(y_true, y_pred, zero_division=1)
    f1 = f1_score(y_true, y_pred, zero_division=1)

    return precision, recall, f1


images_folder = "Test/images_test"
labels_folder = "Test/labels_test"
output_folder = "output_images_trained_model_testonly"
os.makedirs(output_folder, exist_ok=True)

model = YOLO("yolov8n_trained.pt")

all_y_true = []
all_y_pred = []

# Process each image in the folder
for image_name in os.listdir(images_folder):
    image_path = os.path.join(images_folder, image_name)
    label_path = os.path.join(labels_folder, os.path.splitext(image_name)[0] + '.txt')

    image = cv2.imread(image_path)

    if image is None:
        print(f"Could not read image {image_name}. Skipping...")
        continue

    # Load ground truth from label file
    ground_truths = []
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            class_id, x_center, y_center, w, h = map(float, parts)
            if int(class_id) == 0:  # Only process 'car' class
                x1 = int((x_center - w / 2) * image.shape[1])
                y1 = int((y_center - h / 2) * image.shape[0])
                x2 = int((x_center + w / 2) * image.shape[1])
                y2 = int((y_center + h / 2) * image.shape[0])
                ground_truths.append([x1, y1, x2, y2])

    # Perform inference
    results = model(image)

    # Extract detections
    detections = []
    for result in results[0].boxes:
        class_id = int(result.cls)
        class_name = results[0].names[class_id]
        confidence = float(result.conf)
        x1, y1, x2, y2 = map(int, result.xyxy[0].tolist())

        if class_name == 'car':
            detections.append([x1, y1, x2, y2, confidence])
        cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 1)
        cv2.putText(image, f"{class_name} ({confidence:.2f})", (x1 - 160, (y1 + y2) // 2), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 1)

    # Evaluate model
    precision, recall, f1 = evaluate_model(detections, ground_truths)
    print(f"Processed {image_name}: Precision: {precision:.2f}, Recall: {recall:.2f}, F1 Score: {f1:.2f}")

    # Update overall metrics
    for gt in ground_truths:
        if any(iou(gt[:4], det[:4]) >= 0.5 for det in detections):
            all_y_true.append(1)
            all_y_pred.append(1)
        else:
            all_y_true.append(1)
            all_y_pred.append(0)

    for det in detections:
        if not any(iou(det[:4], gt[:4]) >= 0.5 for gt in ground_truths):
            all_y_true.append(0)
            all_y_pred.append(1)

    # Save the annotated image
    output_path = os.path.join(output_folder, image_name)
    cv2.imwrite(output_path, image)

# Calculate overall precision, recall, and F1 score
precision = precision_score(all_y_true, all_y_pred, zero_division=1)
recall = recall_score(all_y_true, all_y_pred, zero_division=1)
f1 = f1_score(all_y_true, all_y_pred, zero_division=1)

print(f"Overall Precision: {precision:.2f}")
print(f"Overall Recall: {recall:.2f}")
print(f"Overall F1 Score: {f1:.2f}")



0: 384x640 2 persons, 5 cars, 1 bus, 71.0ms
Speed: 4.0ms preprocess, 71.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Processed frame_000002.PNG: Precision: 0.00, Recall: 0.00, F1 Score: 0.00

0: 384x640 1 person, 7 cars, 1 bus, 1 train, 109.9ms
Speed: 2.1ms preprocess, 109.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Processed frame_000023.PNG: Precision: 0.00, Recall: 0.00, F1 Score: 0.00

0: 384x640 3 persons, 6 cars, 1 train, 1 truck, 57.0ms
Speed: 2.0ms preprocess, 57.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Processed frame_000032.PNG: Precision: 0.00, Recall: 0.00, F1 Score: 0.00

0: 384x640 1 person, 9 cars, 1 bus, 1 train, 81.0ms
Speed: 3.0ms preprocess, 81.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Processed frame_000039.PNG: Precision: 0.00, Recall: 0.00, F1 Score: 0.00

0: 384x640 1 person, 6 cars, 1 train, 1 truck, 76.0ms
Speed: 2.0ms preprocess, 76.0ms inference, 1.0ms pos