In [None]:
!pip install ultralytics
!cd /content
!pwd
!mkdir dataset
!unzip /content/drive/MyDrive/CV_Itmo_course/forest_fire.zip -d /content/dataset
!mv -v /content/dataset/train_fire/* /content/dataset/train-smoke

In [2]:
import os
from random import shuffle
from ultralytics import YOLO
import gc
from tqdm.notebook import tqdm
import cv2

In [3]:
IMAGE_DIR_PATH = f"/content/dataset/train-smoke"

In [4]:
def get_batch(path: str, batch_size: int) -> list:
  paths = [i for i in os.listdir(path)]
  shuffle(paths)
  for i in range(0, len(paths), batch_size):
    yield paths[i:min(i + batch_size, len(paths))]


In [5]:
HOME = os.getcwd()
print("HOME:", HOME)

HOME: /content


In [6]:
%cd {HOME}
!git clone https://github.com/IDEA-Research/GroundingDINO.git
%cd {HOME}/GroundingDINO
!pip install -q -e .

/content
Cloning into 'GroundingDINO'...
remote: Enumerating objects: 401, done.[K
remote: Counting objects: 100% (76/76), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 401 (delta 51), reused 44 (delta 44), pack-reused 325[K
Receiving objects: 100% (401/401), 12.84 MiB | 25.20 MiB/s, done.
Resolving deltas: 100% (204/204), done.
/content/GroundingDINO
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m57.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.7/254.7 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m89.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.0/295.0 kB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [3

In [7]:
!pip uninstall -y supervision
!pip install -q supervision==0.6.0

import supervision as sv
print(sv.__version__)

Found existing installation: supervision 0.6.0
Uninstalling supervision-0.6.0:
  Successfully uninstalled supervision-0.6.0
0.6.0


In [8]:
!mkdir /content/foundational_output

In [9]:
import torch

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [10]:
import os

GROUNDING_DINO_CONFIG_PATH = os.path.join(HOME, "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py")
print(GROUNDING_DINO_CONFIG_PATH, "; exist:", os.path.isfile(GROUNDING_DINO_CONFIG_PATH))

/content/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py ; exist: True


In [11]:
DATASET_DIR_PATH = f"/content/foundational_output"

In [12]:
%cd {HOME}
!mkdir -p {HOME}/weights
%cd {HOME}/weights

!wget -q https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth

/content
/content/weights


In [13]:
import os

GROUNDING_DINO_CHECKPOINT_PATH = os.path.join(HOME, "weights", "groundingdino_swint_ogc.pth")
print(GROUNDING_DINO_CHECKPOINT_PATH, "; exist:", os.path.isfile(GROUNDING_DINO_CHECKPOINT_PATH))

/content/weights/groundingdino_swint_ogc.pth ; exist: True


In [14]:
%cd {HOME}/GroundingDINO

from groundingdino.util.inference import Model

grounding_dino_model = Model(model_config_path=GROUNDING_DINO_CONFIG_PATH, model_checkpoint_path=GROUNDING_DINO_CHECKPOINT_PATH)

/content/GroundingDINO


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


final text_encoder_type: bert-base-uncased


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [15]:
from typing import List
import cv2

In [16]:
def enhance_class_name(class_names: List[str]) -> List[str]:
    return [
        f"all {class_name}s"
        for class_name
        in class_names
    ]

In [25]:
IMAGES_DIRECTORY = '/content/dataset/train-smoke'
IMAGES_EXTENSIONS = ['jpg', 'jpeg', 'png']

CLASSES = ['smoke', 'fire']
BOX_TRESHOLD = 0.35
TEXT_TRESHOLD = 0.35

In [26]:
import cv2
from tqdm.notebook import tqdm

annotations = {}
batch = get_batch(IMAGES_DIRECTORY, 1000)
for image_names in batch:
  for image_name in tqdm(image_names):

      image_path = IMAGES_DIRECTORY + '/' + image_name
      image = cv2.imread(image_path)

      detections = grounding_dino_model.predict_with_classes(
          image=image,
          classes=enhance_class_name(class_names=CLASSES),
          box_threshold=BOX_TRESHOLD,
          text_threshold=TEXT_TRESHOLD
      )
      detections = detections[detections.class_id != None]

      annotations[image_name] = detections
  gc.collect()

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/733 [00:00<?, ?it/s]

In [27]:
sum_of_det = 0
for i in annotations:
  sum_of_det += annotations[i].class_id.shape[0]
sum_of_det

12768

In [35]:
annotations['003308.jpg'].class_id

array([0, 1, 0])

In [None]:
annotations

In [63]:
yolo_annot = {}

In [64]:
for key in annotations:
  yolo_list = []
  print(key)
  img_size = cv2.imread(f'/content/dataset/train-smoke/{key}').shape
  for i in range(len(annotations[key].class_id)):
    x_mid = ((annotations[key].xyxy[i][0] + annotations[key].xyxy[i][2]) / 2)/img_size[1]
    y_mid = ((annotations[key].xyxy[i][1] + annotations[key].xyxy[i][3]) / 2)/img_size[0]
    height = (annotations[key].xyxy[i][3] - annotations[key].xyxy[i][1])/img_size[0]
    width = (annotations[key].xyxy[i][2] - annotations[key].xyxy[i][0])/img_size[1]
    det_class = annotations[key].class_id[i]
    yolo_list.append((det_class, x_mid, y_mid, width, height))
  yolo_annot[key] = yolo_list

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
000037.jpg
011763.jpg
002165.jpg
009026.jpg
001903.jpg
006638.jpg
009417.jpg
005403.jpg
002217.jpg
008458.jpg
009046.jpg
009136.jpg
fire-9959.22380616.png
008846.jpg
002891.jpg
008672.jpg
004152.jpg
000117.jpg
006627.jpg
008706.jpg
007032.jpg
006320.jpg
010621.jpg
006866.jpg
fire-3041.78410264.png
007109.jpg
009192.jpg
004488.jpg
001506.jpg
003436.jpg
007782.jpg
001684.jpg
005966.jpg
005941.jpg
005045.jpg
002339.jpg
fire-4599.12178968.png
009341.jpg
004676.jpg
000977.jpg
fire-5102.99291241.png
001128.jpg
fire-7544.77593813.png
008059.jpg
005459.jpg
fire-5468.46636932.png
004396.jpg
001322.jpg
001919.jpg
001113.jpg
008966.jpg
000132.jpg
004610.jpg
009576.jpg
009727.jpg
002289.jpg
010461.jpg
002587.jpg
007252.jpg
005054.jpg
002946.jpg
004228.jpg
fire-3183.92370808.png
010893.jpg
fire-2263.99444288.png
002086.jpg
011754.jpg
001806.jpg
002588.jpg
000277.jpg
004592.jpg
fire-3958.94569231.png
fire-3197.28352517

In [68]:
yolo_annot['010614.jpg']

[(0,
  0.78056640625,
  0.5021871496129919,
  0.09003346761067708,
  0.236226314968533)]

In [37]:
img = cv2.imread('/content/dataset/train-smoke/000007.jpg')

In [39]:
from google.colab.patches import cv2_imshow

In [58]:
len(annotations['003308.jpg'].class_id)

3

In [52]:
import numpy as np

In [70]:
test_file = open('/content/drive/MyDrive/ProjectDrones/labels/003308.txt', 'w+')

In [80]:
for key in yolo_annot:
  name = '/content/drive/MyDrive/ProjectDrones/labels' + '/' + key.split('.')[0] + '.' + 'txt'
  detection_file = open(name, 'w+')
  for detection in yolo_annot[key]:
    det_string = f'{detection[0]} {detection[1]} {detection[2]} {detection[3]} {detection[4]}'
    detection_file.write(det_string + '\n')
  detection_file.close()

In [71]:
'003308.jpg'.split('.')[0]

'003308'

xd

