In [3]:
# ultralytics 설치
!pip install ultralytics
# CLIP 설치
!pip install git+https://github.com/openai/CLIP.git

Collecting ultralytics
  Downloading ultralytics-8.2.28-py3-none-any.whl (779 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/779.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m256.0/779.6 kB[0m [31m8.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.6/779.6 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=0.2.5 (from ultralytics)
  Downloading ultralytics_thop-0.2.7-py3-none-any.whl (25 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached

In [4]:
from ultralytics import YOLO
import os
import locale
import subprocess
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
gpu_info = subprocess.check_output(["nvidia-smi"]).decode("utf-8")

if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Fri Jun  7 06:37:22 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA L4                      Off | 00000000:00:03.0 Off |                    0 |
| N/A   50C    P8              12W /  72W |      1MiB / 23034MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [6]:
os.getcwd()
locale.getpreferredencoding = lambda: "UTF-8"

### **모델 추론**
#### 사전에 학습한 모델 가중치('best.pt') 불러오기

In [8]:
import cv2
import json
import numpy as np
import shutil
from google.colab import files
import os
from ultralytics import YOLO

# YOLO 모델 로드
model = YOLO("/content/drive/MyDrive/Colab_Notebooks/Addinedu/Crutches_and_wheelchairs_ver6_by_yolov8l-worldv2_best.pt")
# Define custom classes
model.set_classes(['stick_user', 'person_no', 'wheelchair_user'])
# Save the model with the defined offline vocabulary
model = YOLO("/content/drive/MyDrive/Colab_Notebooks/Addinedu/Crutches_and_wheelchairs_ver6_by_yolov8l-worldv2_best_set_classes.pt")

# 클래스 별 색상 정의
class_colors = {
    'stick_user': (0, 0, 255),    # Red
    'person_no':      (255, 0, 0),   # Blue
    'wheelchair_user': (255, 0, 255) # Pink
}

def filter_boxes(results):
    filtered_results = []
    for result in results:
        boxes = result.boxes.xyxy.cpu().numpy()
        scores = result.boxes.conf.cpu().numpy()
        labels = result.boxes.cls.cpu().numpy()
        names = result.names

        unique_labels = np.unique(labels)
        best_boxes = []
        for label in unique_labels:
            label_mask = labels == label
            label_boxes = boxes[label_mask]
            label_scores = scores[label_mask]
            max_index = np.argmax(label_scores)
            best_boxes.append((label_boxes[max_index], label_scores[max_index], label, names))

        filtered_results.append(best_boxes)
    return filtered_results

def draw_boxes(filtered_results, image):
    for result in filtered_results:
        for box, score, label, names in result:
            x1, y1, x2, y2 = map(int, box)
            class_name = names[int(label)]
            if class_name in class_colors:
                color = class_colors[class_name]
                label_text = f"{class_name} {score:.2f}"
                cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
                cv2.putText(image, label_text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

def save_json(filename, detected_objects):
    data = {
        "image": {
            "filename": filename,
            "detected_objects": detected_objects
        }
    }
    json_path = filename.replace('.jpg', '.json')
    with open(json_path, 'w') as json_file:
        json.dump(data, json_file, indent=4)

if __name__ == "__main__":
    # 동영상 파일 경로
    video_path = '/content/drive/MyDrive/Colab_Notebooks/Addinedu/bus_crutch.mp4'
    output_video_path = '/content/drive/MyDrive/Colab_Notebooks/Addinedu/predicted_bus_crutch.mp4'

    # '.mp4' 부분을 제거하고 'output_frames' 디렉토리를 생성
    output_frames_dir = output_video_path.replace('.mp4', '') + '/output_frames'
    os.makedirs(output_frames_dir, exist_ok=True)

    # '.mp4' 부분을 제거하고 'output_frames' 디렉토리를 생성
    output_json_dir = '../data/detection'
    os.makedirs(output_json_dir, exist_ok=True)

    # 동영상 캡처 객체 생성
    cap = cv2.VideoCapture(video_path)
    frame_width = 640
    frame_height = 640
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # 동영상 파일 저장 객체 생성
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (640, 640))
        results = model.predict(frame, save=False, conf=0.5)

        filtered_results = filter_boxes(results)

        detected_objects = {}
        for result in filtered_results:
            for box, score, label, names in result:
                class_name = names[int(label)]
                if class_name in ['stick_user', 'person_no', 'wheelchair_user']:
                    if class_name not in detected_objects:
                        detected_objects[class_name] = {}
                    bbox_id = f"id{len(detected_objects[class_name]) + 1}"
                    detected_objects[class_name][bbox_id] = [int(box[0]), int(box[1]), int(box[2]), int(box[3])]

        if detected_objects:
            frame_filename = os.path.join(output_frames_dir, f"frame{frame_count:04d}.jpg")
            json_frame_filename = os.path.join(output_json_dir, f"frame{frame_count:04d}.jpg")
            cv2.imwrite(frame_filename, frame)
            save_json(json_frame_filename, detected_objects)

        draw_boxes(filtered_results, frame)
        out.write(frame)
        frame_count += 1

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    print(f"Processed video saved to {output_video_path}")
    print(f"Processed frames saved to {output_frames_dir}")

    # 압축할 디렉토리와 압축 파일 경로 설정
    output_filename = '/content/detection.zip'

    # 디렉토리를 압축
    shutil.make_archive('/content/detection', 'zip', output_json_dir)

    # 압축된 파일 다운로드
    files.download(output_filename)


100%|████████████████████████████████████████| 338M/338M [00:03<00:00, 109MiB/s]



0: 640x640 1 stick_user, 15.3ms
Speed: 1.7ms preprocess, 15.3ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 stick_user, 15.5ms
Speed: 1.8ms preprocess, 15.5ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 15.5ms
Speed: 1.5ms preprocess, 15.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 15.7ms
Speed: 1.5ms preprocess, 15.7ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 stick_user, 15.2ms
Speed: 1.8ms preprocess, 15.2ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 stick_user, 24.1ms
Speed: 1.4ms preprocess, 24.1ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 stick_user, 15.6ms
Speed: 1.5ms preprocess, 15.6ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 stick_user, 15.6ms
Speed: 2.2ms preprocess, 15.6ms inference, 1.2ms po

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>