In [None]:
# @title Загрузка библиотек
!pip install -q 'git+https://github.com/facebookresearch/segment-anything.git'  #sam
!pip install ultralytics                                                        #YOLOv8

In [3]:
!wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth   #Загрузка весов для sam

In [4]:
# @title Проверка доступности GPU
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


In [5]:
import numpy as np
import torch
import matplotlib
import matplotlib.pyplot as plt
import cv2
import os
from pycocotools import mask as mask_utils
import json

In [6]:
# @title Функции из sam https://github.com/facebookresearch/segment-anything/blob/main/notebooks/predictor_example.ipynb
def show_mask(mask, ax, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

def show_points(coords, labels, ax, marker_size=375):
    pos_points = coords[labels==1]
    neg_points = coords[labels==0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)

def show_box(box, ax):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))

In [11]:
# @title Функция построение прямоугольника вокруг тигра и сегментация в sam
def segment_pic(file, sam):
    #load image
    image = cv2.imread(f"./train/{file}")
    objects = model(image, save=True, classes=[22])
    for result in objects:
        boxes = result.boxes
        cls = boxes.cls
        if len(cls) > 0 and cls[0] == 22:
            # Получение координат
            x1, y1, x2, y2 = boxes.xyxy[0].tolist()

            # Постороение прямоугольника
            cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)

            # Текст к прямоугольнику
            text = "tiger"
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 1.5
            thickness = 4
            text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
            text_x = int(x1 + 5)
            text_y = int(y1 + text_size[1] + 5)
            cv2.putText(image, text, (text_x, text_y), font, font_scale, (0, 0, 255), thickness)

            ##################################################################################
            #SAM MODEL
            ##################################################################################

            predictor = SamPredictor(sam)
            predictor.set_image(image)

            input_box = np.array(boxes.xyxy[0].tolist())

            masks, _, _ = predictor.predict(
                point_coords=None,
                point_labels=None,
                box=input_box[None,:],   # Использование необходимого прямоугольника
                multimask_output=False
            )
            out_file, _ = file.split(".")
            # Convert the mask to a binary image
            binary_mask = torch.from_numpy(masks).squeeze().numpy().astype(np.uint8)

            # Find the contours of the mask
            contours, hierarchy = cv2.findContours(binary_mask,
                                                cv2.RETR_EXTERNAL,
                                                cv2.CHAIN_APPROX_SIMPLE)
            largest_contour = max(contours, key=cv2.contourArea)
            segmentation = largest_contour.flatten().tolist()

            coco_data = {
                "annotations":
                 {
                    "id": 1,
                    "image_id": int(f"{out_file}"),
                    "category_id": 1,
                    "segmentation": segmentation, #[mask_utils.encode(np.asfortranarray(binary_mask))]
                    "area": int(masks[0].sum()),
                    "bbox": [x1, y1, x2, y2],
                    "iscrowd": 0
                }
            }
            coco_list = [coco_data]
            plt.figure(figsize=(10,10))
            plt.imshow(image)
            show_mask(masks[0], plt.gca())
            show_box(input_box, plt.gca())
            plt.axis("off")
            plt.savefig(f"./out/{out_file}.png")
            plt.show()

            with open(f"./out_json/out_{out_file}.json", "w") as f:
                json.dump(coco_list, f)

In [None]:
# @title Построение исходного изображения
image = cv2.imread("000032.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(20,20))
plt.imshow(image)
plt.axis("off")
plt.show()

In [8]:
# @title Загрузка модели yolov8s
from ultralytics import YOLO
#load small detection model
model = YOLO("yolov8s.pt")

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt to 'yolov8s.pt'...
100%|██████████| 21.5M/21.5M [00:00<00:00, 241MB/s]


In [None]:
import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamPredictor

sam_checkpoint = "sam_vit_h_4b8939.pth" # Наименование модели
model_type = "vit_h" # Тип модели
device = "cpu"      # используемое ядро (для быстродействия лучше использовать "cuda")

sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)

In [12]:
matplotlib.use("Agg")
for filename in os.listdir('train'):
    if filename[filename.rfind(".") + 1:] in ['jpg', 'jpeg', 'png']:
        segment_pic(filename, sam)


0: 416x640 1 zebra, 287.3ms
Speed: 3.6ms preprocess, 287.3ms inference, 0.8ms postprocess per image at shape (1, 3, 416, 640)
Results saved to [1mruns/detect/predict[0m


In [None]:
# @title Возможные детекции
result = model.predict("./train/000037.jpg")
result = result[0]

print(result.names)


image 1/1 /content/train/000037.jpg: 320x640 1 zebra, 16.1ms
Speed: 2.2ms preprocess, 16.1ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)


{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microw