In [None]:
# ultralytics 설치
!pip install ultralytics
# CLIP 설치
!pip install git+https://github.com/openai/CLIP.git

In [None]:
from ultralytics import YOLO
import os
import locale
import subprocess
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
gpu_info = subprocess.check_output(["nvidia-smi"]).decode("utf-8")

if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
os.getcwd()
locale.getpreferredencoding = lambda: "UTF-8"

### **모델 추론**
#### 사전에 학습한 모델 가중치('best.pt') 불러오기

In [None]:
import cv2
import json
import numpy as np
import shutil
from google.colab import files
import os
from ultralytics import YOLO

# 첫 번째 모델 로드 (stick_user 및 wheelchair_user 클래스용)
model1 = YOLO("/content/drive/MyDrive/Colab_Notebooks/Addinedu//content/drive/MyDrive/Colab_Notebooks/Addinedu/Crutches_and_wheelchairs_ver6_by_yolov8l-worldv2_best_set_classes.pt")

# 두 번째 모델 로드 (person 클래스용)
model2 = YOLO("yolov8l-worldv2.pt")
model2.set_classes(["person"])

# 클래스 별 색상 정의
class_colors = {
    'stick_user': (0, 0, 255),    # Red
    'person': (255, 0, 0),     # Blue
    'person_no': (255, 0, 0),     # Blue
    'wheelchair_user': (255, 0, 255) # Pink
}

# 카메라 위치별 값 정의
camera_ids = ['A', 'B', 'C']

def filter_boxes(results):
    filtered_results = []
    for result in results:
        boxes = result.boxes.xyxy.cpu().numpy()
        scores = result.boxes.conf.cpu().numpy()
        labels = result.boxes.cls.cpu().numpy()
        names = result.names

        unique_labels = np.unique(labels)
        best_boxes = []
        for label in unique_labels:
            label_mask = labels == label
            label_boxes = boxes[label_mask]
            label_scores = scores[label_mask]
            max_index = np.argmax(label_scores)
            best_boxes.append((label_boxes[max_index], label_scores[max_index], label, names))

        filtered_results.append(best_boxes)
    return filtered_results

def draw_boxes(filtered_results, image):
    for result in filtered_results:
        for box, score, label, names in result:
            x1, y1, x2, y2 = map(int, box)
            class_name = names[int(label)]
            if class_name in class_colors:
                color = class_colors[class_name]
                label_text = f"{class_name} {score:.2f}"
                cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
                cv2.putText(image, label_text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

def remove_detected_objects(frame, filtered_results):
    mask = np.ones(frame.shape[:2], dtype=np.uint8) * 255  # Create a white mask
    for result in filtered_results:
        for box, _, label, names in result:
            class_name = names[int(label)]
            if class_name in ['stick_user', 'wheelchair_user']:
                x1, y1, x2, y2 = map(int, box)
                mask[y1:y2, x1:x2] = 0  # Set detected areas to black in the mask
    return cv2.bitwise_and(frame, frame, mask=mask)  # Apply mask to frame

def save_json(filename, detected_objects):
    data = {
        "image": {
            "filename": filename,
            "detected_objects": detected_objects
        }
    }
    json_path = filename.replace('.jpg', '.json')
    with open(json_path, 'w') as json_file:
        json.dump(data, json_file, indent=4)

if __name__ == "__main__":
    # 동영상 파일 경로
    video_name = 'bus_wheelchair_crutch.mp4'
    video_path = f'/content/drive/MyDrive/Colab_Notebooks/Addinedu/{video_name}'
    output_video_path = f'/content/drive/MyDrive/Colab_Notebooks/Addinedu/predicted_{video_name}'

    # 'output_frames' 디렉토리를 생성
    output_frames_dir = '../data/detection'
    os.makedirs(output_frames_dir, exist_ok=True)

    # 'output_json' 디렉토리를 생성
    output_json_dir = '../data/detection'
    os.makedirs(output_json_dir, exist_ok=True)

    # 동영상 캡처 객체 생성
    cap = cv2.VideoCapture(video_path)
    frame_width = 640
    frame_height = 640
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # 동영상 파일 저장 객체 생성
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (640, 640))

        # 첫 번째 모델로 예측
        results1 = model1.predict(frame, save=False, conf=0.5)
        filtered_results1 = filter_boxes(results1)

        # detected_objects 첫 번째 모델 결과 저장
        detected_objects = {}
        for result in filtered_results1:
            for box, score, label, names in result:
                class_name = names[int(label)]
                if class_name in ['stick_user', 'wheelchair_user']:
                    if class_name not in detected_objects:
                        detected_objects[class_name] = {}
                    bbox_id = f"id{len(detected_objects[class_name]) + 1}"
                    detected_objects[class_name][bbox_id] = [int(box[0]), int(box[1]), int(box[2]), int(box[3])]

        # 첫 번째 모델 결과를 바탕으로 객체 제외
        frame_no_objects = remove_detected_objects(frame, filtered_results1)

        # 두 번째 모델로 예측
        results2 = model2.predict(frame_no_objects, save=False, conf=0.5, iou=0.5, max_det=100)
        filtered_results2 = filter_boxes(results2)

        # detected_objects에 두 번째 모델 결과 추가 저장
        for result in filtered_results2:
            for box, score, label, names in result:
                class_name = names[int(label)]
                if class_name in ['person']:
                    if class_name not in detected_objects:
                        detected_objects[class_name] = {}
                    bbox_id = f"id{len(detected_objects[class_name]) + 1}"
                    detected_objects[class_name][bbox_id] = [int(box[0]), int(box[1]), int(box[2]), int(box[3])]

        if detected_objects:
            frame_filename = os.path.join(output_frames_dir, f"{camera_ids[0]}_{video_name}_{frame_count:04d}.jpg")
            json_frame_filename = os.path.join(output_json_dir, f"{camera_ids[0]}_{video_name}_{frame_count:04d}.jpg")
            cv2.imwrite(frame_filename, frame)
            save_json(json_frame_filename, detected_objects)

        # 두 모델의 결과 모두 그리기
        draw_boxes(filtered_results1, frame)
        draw_boxes(filtered_results2, frame)

        out.write(frame)
        frame_count += 1

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    print(f"Processed video saved to {output_video_path}")
    print(f"Processed frames saved to {output_frames_dir}")

    # 압축할 디렉토리와 압축 파일 경로 설정
    output_filename = '/content/detection.zip'

    # 디렉토리를 압축
    shutil.make_archive('/content/detection', 'zip', output_json_dir)

    # 압축된 파일 다운로드
    files.download(output_filename)