In [4]:
import numpy as np
import os
import torch
from torchvision.transforms import Compose, ToTensor, Normalize
from PIL import Image
import matplotlib.pyplot as plt
from ultralytics import YOLO
import cv2

In [12]:
model = YOLO('yolov8n.pt') #asta e deja antrenata 
results = model('imagine.jpg')
processed_image = results[0].plot()

output_path = 'outputs/imagine_detectata.jpg'
Image.fromarray(processed_image).save(output_path)


image 1/1 /home/lolluckestar/Desktop/M1/Computer-Vision/Lab6/imagine.jpg: 448x640 43 cars, 1 bus, 2 trucks, 53.6ms
Speed: 1.5ms preprocess, 53.6ms inference, 0.8ms postprocess per image at shape (1, 3, 448, 640)


In [None]:
model = YOLO('yolov8n.pt')

results = model('download.jpeg')
detections = results[0].boxes.data #luam datele din fiecare box

person_class = 0
num_persons = sum(1 for det in detections if int(det[5]) == person_class) #aici numara clasele (indexul din comon object context)

print(f"Numărul de persoane detectate: {num_persons}")

processed_image = results[0].plot()
output_path = 'outputs/imagine_persoana_detectata.jpg'
Image.fromarray(processed_image).save(output_path)


image 1/1 /home/lolluckestar/Desktop/M1/Computer-Vision/Lab6/download.jpeg: 640x448 3 persons, 1 tie, 68.2ms
Speed: 2.5ms preprocess, 68.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 448)
Numărul de persoane detectate: 3


In [None]:
from operator import itemgetter

model = YOLO('yolov8n.pt')
videoPath = 'crowd.mp4'

cap = cv2.VideoCapture(videoPath) #am clasa pentru capture intializata cu mp4-ul meu
fourcc = cv2.VideoWriter_fourcc(*'mp4v') #descopunere in caracter
fps = int(cap.get(cv2.CAP_PROP_FPS)) #pt a pastra fps constant - cadre sec
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) #width
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))#height
out = cv2.VideoWriter('output.mp4', fourcc, fps, (frame_width, frame_height)) #obiectul de write

while cap.isOpened:
    ret,frame = cap.read()
    if not ret: #daca mai am cadre de citit asta arata - un flag
        break
    
    prediction = model(frame)
    detection = prediction[0].boxes.data

    if len(detection) > 0:
        detections = sorted(detection,key=itemgetter(4),reverse=True) #sortare dupa cele mai mari elemente -nr 4 adica confidence
        top_detections = detections[:4] #aici numa 4 bounding boxes

        for det in top_detections:
            x1, y1, x2, y2, conf, cls = map(float, det) #le mapam
            label = f"{model.names[int(cls)]} {conf:.2f}" #scoatem label-ul - numele lor
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 
                        0.5, (0, 255, 0), 2)
    out.write(frame)
    
    cv2.imshow('YOLO Detection', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()





0: 384x640 18 persons, 19.9ms
Speed: 1.8ms preprocess, 19.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 persons, 19.3ms
Speed: 1.5ms preprocess, 19.3ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 15 persons, 19.3ms
Speed: 1.5ms preprocess, 19.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 20 persons, 19.4ms
Speed: 2.0ms preprocess, 19.4ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 19.4ms
Speed: 2.1ms preprocess, 19.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 25 persons, 19.3ms
Speed: 1.5ms preprocess, 19.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 1 bus, 19.3ms
Speed: 1.6ms preprocess, 19.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 20 persons, 19.3ms
Speed: 1.9ms preprocess, 19.3ms inference, 1.1ms postprocess per i

In [1]:
import cv2
import os
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

model = YOLO("yolo11n-seg.pt")

frames_path = "data/images" 
output_folder = "output5" 
output_video = "tracked_video.avi"

os.makedirs(output_folder, exist_ok=True)

frame_files = sorted([os.path.join(frames_path, f) for f in os.listdir(frames_path) if f.endswith(".PNG")])

im0 = cv2.imread(frame_files[0])
h, w, _ = im0.shape
fps = 30 

out = cv2.VideoWriter(output_video, cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))

tracked_vehicle_id = None
entry_frame = None
exit_frame = None

total_objects = 0
correct_detections = 0
iou_threshold = 0.5

def compute_iou(box1, box2):
    x1, y1, x2, y2 = box1
    x1_p, y1_p, x2_p, y2_p = box2

    inter_x1 = max(x1, x1_p)
    inter_y1 = max(y1, y1_p)
    inter_x2 = min(x2, x2_p)
    inter_y2 = min(y2, y2_p)

    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2_p - x1_p) * (y2_p - y1_p)

    union_area = box1_area + box2_area - inter_area

    if union_area == 0:
        return 0
    return inter_area / union_area

for frame_idx, frame_file in enumerate(frame_files):
    im0 = cv2.imread(frame_file)
    annotator = Annotator(im0, line_width=2)

    results = model.track(im0, persist=True)

    if results[0].boxes.id is not None and results[0].masks is not None:
        boxes = results[0].boxes.xyxy.cpu().numpy()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        class_ids = results[0].boxes.cls.int().cpu().tolist()
        masks = results[0].masks.xy

        for mask, track_id, class_id in zip(masks, track_ids, class_ids):
            # Marchează vehiculul urmărit
            if tracked_vehicle_id is None and track_id == 1:  # Alege un track_id preferat
                tracked_vehicle_id = track_id
                entry_frame = frame_idx

            if track_id == tracked_vehicle_id:
                color = colors(int(track_id), True)
                txt_color = annotator.get_txt_color(color)
                annotator.seg_bbox(mask=mask, mask_color=color, label=f"Tracked {track_id}", txt_color=txt_color)
                exit_frame = frame_idx

            else:
                color = colors(int(track_id), True)
                annotator.seg_bbox(mask=mask, mask_color=color, label=str(track_id))

    output_file = os.path.join(output_folder, f"frame_{frame_idx:04d}.png")
    cv2.imwrite(output_file, im0)

    out.write(im0)

    print(f"Frame {frame_idx} procesat și salvat în {output_file}")

out.release()
print("Procesarea s-a încheiat.")
if tracked_vehicle_id is not None:
    print(f"Tracked vehicle {tracked_vehicle_id}: Entered at frame {entry_frame}, exited at frame {exit_frame}")
else:
    print("No vehicle was tracked.")

cap = cv2.VideoCapture(output_video)
if not cap.isOpened():
    print("Eroare la deschiderea videoclipului rezultat!")
else:
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        cv2.imshow("Tracked Video", frame)
        if cv2.waitKey(30) & 0xFF == ord("q"):
            break
cap.release()
cv2.destroyAllWindows()



0: 384x640 8 cars, 62.0ms
Speed: 4.9ms preprocess, 62.0ms inference, 250.2ms postprocess per image at shape (1, 3, 384, 640)
Frame 0 procesat și salvat în output5/frame_0000.png

0: 384x640 6 cars, 1 truck, 27.6ms
Speed: 2.0ms preprocess, 27.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Frame 1 procesat și salvat în output5/frame_0001.png

0: 384x640 7 cars, 27.4ms
Speed: 1.8ms preprocess, 27.4ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)
Frame 2 procesat și salvat în output5/frame_0002.png

0: 384x640 7 cars, 27.5ms
Speed: 1.9ms preprocess, 27.5ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)
Frame 3 procesat și salvat în output5/frame_0003.png

0: 384x640 7 cars, 27.4ms
Speed: 1.9ms preprocess, 27.4ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Frame 4 procesat și salvat în output5/frame_0004.png

0: 384x640 7 cars, 27.5ms
Speed: 1.8ms preprocess, 27.5ms inference, 2.3ms postprocess per image at sha