In [62]:
from pathlib import Path
import os
import pandas as pd


In [63]:
dir_path = Path(os.path.join('..','tmp'))
dataset_path = Path(os.path.join('..','dataset','objects'))
data_path = dataset_path.joinpath('data.csv')

### CVAT download dataset

In [64]:
import requests
import shutil
import sys
from glob import glob
import cv2
sys.path.insert(0, "..")
from zipfile import ZipFile

from io import BytesIO

In [65]:
URL = 'http://10.72.125.105:8080'
API_URL = '%s/api/v1' % URL
LOGIN = 'ml'
PASSWORD = 'NuAzu8mO'
PROJECT = 'Liza'

def get_session(url, username, password):
    session = requests.Session()
    response=session.post(url, data={'username':username,'password':password})
    response.raise_for_status()
    if 'csrftoken' in response.cookies:
        session.headers['X-CSRFToken'] = response.cookies['csrftoken']
    return session

def get_request(self, method, query=None):
    url = "%s/%s" % (API_URL,method)
    if ~(query is None):
        url+='?%s'%query
    request = self.get(url)
    if request.headers['Content-Type'] == 'application/json':
        return request.json()
    else:
        return request

In [66]:
url = '%s/auth/login' % API_URL
session = get_session(url, LOGIN, PASSWORD)
projects = get_request(session,'projects','?search=%s' % (PROJECT))
project= projects['results'][0]

In [67]:
labels=pd.DataFrame(project['labels'])

In [68]:
def makePath(path):
    if path.exists() and path.is_dir():
        shutil.rmtree(path)
    os.mkdir(path)

In [69]:
makePath(dir_path)
makePath(dataset_path)

for task in project['tasks']:
    task_id = task['id']
    chunk_size = task['data_chunk_size']
    annotations = get_request(session,'tasks/%s/annotations' % (task_id))
    shapes = pd.DataFrame(annotations['shapes'])
    chunk_number=0

    for chunk_n in range((shapes.frame.max() // chunk_size) + 1):
        start_chunk = chunk_n * chunk_size
        chunk_shapes = shapes[(shapes.frame >= start_chunk) & (shapes.frame < start_chunk+chunk_size)]

        if len(chunk_shapes) > 0:
            data=get_request(session,'tasks/%s/data' % task_id, 'type=chunk&number=%s&quality=original' % chunk_n)
            zipfile= ZipFile(BytesIO(data.content))
            zipfile.extractall(dir_path)

            for frame in chunk_shapes.frame.unique():
                filename = '%s.*' % str(1000000 + (frame-start_chunk))[1:]
                filename = glob(str(dir_path.joinpath(filename)))[0]
                image_id = '%s_%s' % (task_id,frame)
                new_filename = ('%s.%s' % (image_id, filename.split('.')[-1])).lower()
                shapes.loc[shapes['frame']==frame, 'image_path'] = new_filename
                shutil.move(dir_path.joinpath(filename),dataset_path.joinpath(new_filename))


shutil.rmtree(dir_path)

### Label objects

In [70]:
shapes['label']=shapes.apply(lambda x: labels[labels['id']==x.label_id].index[0], axis=1)

boxes=pd.DataFrame(shapes['points'].to_list(), columns=['x1', 'y1', 'x2', 'y2'])
shapes=pd.concat([shapes,boxes], axis=1)

images_path = dataset_path.joinpath('images')
labels_path = dataset_path.joinpath('labels')
makePath(images_path)
makePath(labels_path)

In [71]:
for image in shapes.image_path.unique():
    im=cv2.imread(str(dataset_path.joinpath(image)))
    shutil.move(dataset_path.joinpath(image),images_path.joinpath(image))
    coords = shapes[shapes['image_path']==image][['label','x1', 'y1', 'x2', 'y2']]
    width = im.shape[1]
    height = im.shape[0]
    
    txt_path = labels_path.joinpath('%s.txt' % image.split('.')[0])
    coords.apply(lambda x: pd.Series([
        x.label, 
        ((x.x2+x.x1)/2)/width, 
        (((x.y2+x.y1)/2)/height), 
        (x.x2-x.x1)/width, 
        (x.y2-x.y1)/height])
    , axis=1).to_csv(txt_path, sep=' ', index=None, header=None)

In [72]:
labels.name.unique()

array(['drons', 'auto', 'swimmer', 'horse', 'hug', 'sherp', 'aero', 'dog'],
      dtype=object)

### Train YOLOv5

In [None]:
!python3 ../yolov5/train.py --img 800 --batch 16 --epochs 3 --data liza.yaml --weights yolov5s.pt 

[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice: (30 second timeout) 
[34m[1mwandb[0m: W&B disabled due to login timeout.
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=liza.yaml, hyp=../yolov5/data/hyps/hyp.scratch-low.yaml, epochs=3, batch_size=16, imgsz=800, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=../yolov5/runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v6.1-261-g19f33cb Python-3.8.10 torch-1.10

In [3]:
import torch
torch.cuda.is_available()

True

### Evaluate model

In [4]:
model = torch.hub.load('ultralytics/yolov5', 'custom', path='../models/model-object-detect.pt') 

Using cache found in /home/Kirill.Fomenko/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-6-25 Python-3.8.10 torch-1.10.0+cu102 CUDA:0 (GRID P40-12Q, 12288MiB)

Fusing layers... 
Model summary: 367 layers, 46145973 parameters, 0 gradients, 107.8 GFLOPs
Adding AutoShape... 


In [9]:
from PIL import Image
im1 = Image.open('/data/images/LizaNew/2_5309971804389900991.JPG') 
im2 = Image.open('/data/images/LizaNew2/post_5d14b69439652.jpg') 
imgs = [im1, im2]
results = model(imgs, size=800)
results.print()
results.show()  # or .show()

image 1/2: 3024x4032 2 autos
image 2/2: 776x580 1 dog
Speed: 420.8ms pre-process, 31.9ms inference, 1.3ms NMS per image at shape (2, 3, 800, 800)


### Mark Objects and save in DB

In [14]:
import pandas as pd

In [100]:
conn = 'postgresql://liza:topsecret@novateam.ddns.net:5434/public'
sql_query = 'select * from "Media"'
sql_ds = pd.read_sql(sql_query, conn)
sql_ds.head()

Unnamed: 0,id,mediaName,mediaPath,mediaSize,mediaType,mediaPreviewUrl,title
0,4207,20220116_121426_01171.jpg,/LizaNew/20220116_121426_01171.jpg,43467,image,https://downloader.disk.yandex.ru/preview/8fe6...,
1,4208,20220116_121426_01136.jpg,/LizaNew/20220116_121426_01136.jpg,44751,image,https://downloader.disk.yandex.ru/preview/d892...,
2,4209,20220116_121426_01179.jpg,/LizaNew/20220116_121426_01179.jpg,43951,image,https://downloader.disk.yandex.ru/preview/67ec...,
3,4210,20220116_121426_01183.jpg,/LizaNew/20220116_121426_01183.jpg,43823,image,https://downloader.disk.yandex.ru/preview/cab8...,
4,4211,20220116_121426_01137.jpg,/LizaNew/20220116_121426_01137.jpg,44758,image,https://downloader.disk.yandex.ru/preview/5f3e...,


In [94]:
sql_query_2 = 'select * from "_MediaToTags"'
sql_media_to_tags = pd.read_sql(sql_query_2, conn)

In [None]:
mapping = {
    0: 12,
    1: 11,
    2: 13,
    3: 15,
    4: 16,
    5: 17,
    6: 10,
    7: 14
}

In [109]:
import os
from tqdm import tqdm
for mediaName in tqdm(sql_ds.mediaName.unique()):
    # mediaName=sql_ds.mediaName.unique()[3]
    media_id = sql_ds[sql_ds['mediaName']==mediaName]['id'].values[0]
    mediaPath=os.path.join('/data','images','LizaNew',mediaName)
    try:
        im=Image.open(mediaPath)
    except:
        print('Except with file %s, pass him' % (mediaPath))
        continue
    imgs = [im]
    results = model(imgs, size=800)
    tags=results.pandas().xyxy[0]['class'].unique()
    for tag in tags:
        try:
            tag_id = mapping[tag]
            pd.DataFrame([{'A':media_id, 'B': tag_id}]).to_sql('_MediaToTags',conn,if_exists='append',index=False)
        except:
            print('keys pair %s and %s already exist. Pass.' % (media_id, tag_id))

keys pair 4207 and 12 already exist. Pass.
keys pair 4208 and 12 already exist. Pass.
keys pair 4209 and 12 already exist. Pass.
keys pair 4210 and 12 already exist. Pass.
keys pair 4211 and 12 already exist. Pass.
keys pair 4212 and 12 already exist. Pass.
keys pair 4212 and 11 already exist. Pass.
keys pair 4213 and 12 already exist. Pass.
keys pair 4214 and 12 already exist. Pass.
keys pair 4215 and 12 already exist. Pass.
keys pair 4216 and 12 already exist. Pass.
keys pair 4216 and 10 already exist. Pass.
keys pair 4217 and 12 already exist. Pass.
keys pair 4218 and 12 already exist. Pass.
keys pair 4219 and 12 already exist. Pass.
keys pair 4221 and 11 already exist. Pass.
keys pair 4221 and 12 already exist. Pass.
keys pair 4222 and 11 already exist. Pass.
keys pair 4222 and 12 already exist. Pass.
keys pair 4223 and 11 already exist. Pass.
keys pair 4223 and 12 already exist. Pass.
keys pair 4223 and 10 already exist. Pass.
keys pair 4224 and 11 already exist. Pass.
keys pair 4