# CREATING DATA SET

In [None]:
!unzip "data.zip"

In [2]:
import os
import re

for folder in ['train', 'valid','test']:
    file_list = os.listdir(f'{folder}')
    for file in file_list:
        if not file.endswith('.txt'):
            continue
        with open(f'./{folder}/{file}', 'r+') as f:
            lines = f.read()
            replaced = re.sub(r'0(?=\s\d+\.\d+){4}', '1', lines)
            replaced = re.sub(r'2(?=\s\d+\.\d+){4}', '0', lines)
            f.seek(0)
            f.write(replaced)
            f.truncate()

In [4]:
import shutil

In [5]:
for folder in ['train', 'valid','test']: 
    if not os.path.isdir(folder+'/images'):
      new_image_path = folder+'/images'
      !mkdir -p {new_image_path}
    if not os.path.isdir(folder+'/labels'):
      new_label_path = folder+'/label'
      !mkdir -p {new_label_path}
    for file in os.listdir(folder):
      if os.path.isfile(os.path.join(folder, file)):
        filename, file_extension = os.path.splitext(file)
        if file_extension=='.jpg':
          shutil.move(folder+'/'+file, folder+'/images/'+file)
        else:
          shutil.move(folder+'/'+file, folder+'/label/'+file)



In [14]:
len(os.listdir('test/images'))

42


# GETTING PERSON IMAGES FROM COCO

In [15]:
!rm COCO/labels/*
!rm COCO/images/*

rm: cannot remove 'COCO/labels/*': No such file or directory
rm: cannot remove 'COCO/images/*': No such file or directory


In [16]:
from pycocotools.coco import COCO
import requests


In [17]:
path = r'COCO/annotations/instances_val2014.json'

In [18]:
coco = COCO(path)

loading annotations into memory...
Done (t=4.85s)
creating index...
index created!


In [19]:
catIds = coco.getCatIds(catNms=['person'])
imgIds = coco.getImgIds(catIds=catIds)

In [20]:
images = coco.loadImgs(imgIds)

In [22]:
for im in images[0:300]: # select as many as you'll need
    img_data = requests.get(im['coco_url']).content
    with open('COCO/images/' + im['file_name'], 'wb') as handler:
        handler.write(img_data)

In [23]:
len(os.listdir('COCO/images'))

300

# FACE DETECTION YOLO

In [None]:
!pip install facenet-pytorch

In [25]:
from facenet_pytorch import MTCNN, extract_face
import matplotlib.image as mpl
import matplotlib.pyplot as plt
import os
import cv2

In [26]:
mtcnn = MTCNN(keep_all=True)

In [27]:
def convert_to_darknet(size, box):
    dw = 1./size[0]
    dh = 1./size[1]
    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

In [None]:
path = 'COCO/images'
labels_path = 'COCO/labels'
ims = os.listdir(path)

ignored_ones = []

for img in ims:
    
    im_path = path+'/'+img
    im = mpl.imread(im_path)

    try:
        boxes, probs, points = mtcnn.detect(im, landmarks=True)
    except RuntimeError as e:
        print(f"Failed to detected image {im_path}")
        continue
        
    if boxes is not None:
        for box, prob in zip(boxes, probs):
            
            startX, startY, endX, endY = box.astype(int)
            color = (0,255,0) 
            cv2.putText(im, 
                        f'{prob:.1%}', 
                        (startX, startY - 10), 
                        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                        fontScale=.5, 
                        color=color,
                        thickness=2)
            cv2.rectangle(im, (startX, startY), (endX, endY), color, 2) 

        w= int(im.shape[0])
        h= int(im.shape[1])
        label = img.rstrip('.jpg')
        
        with open(rf'{labels_path}/{label}.txt', 'w') as f:
            for item in boxes:
                b = (startX, endX, startY, endY)
                bb = convert_to_darknet((w,h), b)
                box = ' '.join(item.astype(str))
                f.write(f"0 {box}\n")
        print(img)
        plt.imshow(im)
        plt.show()

    else:
        ignored_ones.append(im_path)

In [30]:
for i in os.listdir('COCO/images'):
  if 'COCO/images/'+i in ignored_ones:
    # print(i)
    os.remove('COCO/images/'+i)

In [31]:
for i in os.listdir('COCO/images'):
  if i[:-4]+'.txt'not in os.listdir('COCO/labels'):
    os.remove('COCO/images/'+i)

COPY FROM COCO TO DATA

In [34]:
annotations_path = 'COCO/labels'
images_path = 'COCO/images'

In [35]:
annotations_files = [something for something in os.listdir(annotations_path) if not os.path.isdir(annotations_path + something)]
images_files = [something for something in os.listdir(images_path) if not os.path.isdir(images_path + something)]

In [36]:
annotations_files.sort(key = lambda e: int(re.sub('[^0-9]', '', e)))
images_files.sort(key = lambda e: int(re.sub('[^0-9]', '', e)))


In [37]:
from sklearn.model_selection import train_test_split

In [38]:
images_train, images_vals, labels_train, labels_vals = train_test_split(images_files, annotations_files, test_size = 0.3)

In [39]:
for file_name in images_train:
    shutil.copy('COCO/images/'+file_name, 'train/images/'+file_name)

In [41]:
for file_name in labels_train:
    shutil.copy('COCO/labels/'+file_name, 'train/labels/'+file_name)

In [42]:
for file_name in images_vals:
    shutil.copy('COCO/images/'+file_name, 'valid/images/'+file_name)

In [43]:
for file_name in labels_vals:
    shutil.copy('COCO/labels/'+file_name, 'valid/labels/'+file_name)

# MODELLING


In [46]:
!git clone https://github.com/ultralytics/yolov5 # clone repo
!pip install -U -r yolov5/requirements.txt

fatal: destination path 'yolov5' already exists and is not an empty directory.
Collecting matplotlib>=3.2.2
  Downloading matplotlib-3.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (11.2 MB)
[K     |████████████████████████████████| 11.2 MB 5.1 MB/s 
Collecting numpy>=1.18.5
  Downloading numpy-1.21.4-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)
[K     |████████████████████████████████| 15.7 MB 36.7 MB/s 
Collecting opencv-python>=4.1.2
  Downloading opencv_python-4.5.4.58-cp37-cp37m-manylinux2014_x86_64.whl (60.3 MB)
[K     |████████████████████████████████| 60.3 MB 1.3 MB/s 
Collecting Pillow>=7.1.2
  Downloading Pillow-8.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 28.6 MB/s 
[?25hCollecting PyYAML>=5.3.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |███████████████████████████

In [1]:
!python yolov5/train.py --img 416 --batch 16 --epochs 30 \
        --data data.yaml --cfg yolov5l.yaml --weights yolov5l.pt \
        --name robo4_epoch3_s --adam

Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...
[34m[1mtrain: [0mweights=yolov5l.pt, cfg=yolov5l.yaml, data=data.yaml, hyp=yolov5/data/hyps/hyp.scratch.yaml, epochs=30, batch_size=16, imgsz=416, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, adam=True, sync_bn=False, workers=8, project=yolov5/runs/train, name=robo4_epoch3_s, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, patience=100, freeze=0, save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v6.0-96-g8df64a9 torch 1.10.0+cu111 CUDA:0 (Tesla P100-PCIE-16GB, 16281MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.1, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias

In [3]:
!cp yolov5/runs/train/robo4_epoch3_s/weights/best.pt trained_model/

In [4]:
!python3 yolov5/detect.py --weights trained_model/best.pt \
        --source test/images/ --img-size 416 \
        --conf-thres 0.25 --iou-thres 0.45 --device '0' \
        --hide-labels --hide-conf


[34m[1mdetect: [0mweights=['trained_model/best.pt'], source=test/images/, imgsz=[416, 416], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=0, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=yolov5/runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=True, hide_conf=True, half=False, dnn=False
YOLOv5 🚀 v6.0-96-g8df64a9 torch 1.10.0+cu111 CUDA:0 (Tesla P100-PCIE-16GB, 16281MiB)

Fusing layers... 
Model Summary: 367 layers, 46113663 parameters, 0 gradients, 107.8 GFLOPs
image 1/42 /content/test/images/maksssksksss0_png.rf.6b3aea9ed83f70b0a89280ea91bd3ad3.jpg: 416x416 2 bads, 1 good, Done. (0.015s)
image 2/42 /content/test/images/maksssksksss11_png.rf.bcafaa101f46543c8f9550a40e2224e8.jpg: 416x416 1 bad, 28 goods, Done. (0.015s)
image 3/42 /content/test/images/maksssksksss127_png.rf.f4dd78a3734a7e855efdb66f8fcbdb48.jpg: 416x416 1 good, Done. (0.01

In [5]:
!zip -r yolo_v5_18Nov.zip ../content/ 

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: ../content/train/images/maksssksksss135_png.rf.b2e8331c5d226ae8e4a4eba976a70aa6.jpg (deflated 1%)
  adding: ../content/train/images/maksssksksss433_png.rf.2524e87d9fa1a8323d60b8fd965c745f.jpg (deflated 1%)
  adding: ../content/train/images/maksssksksss763_png.rf.fc173e818c88ecf26134e1d79e1dc123.jpg (deflated 1%)
  adding: ../content/train/images/maksssksksss283_png.rf.d3efc5155b1d5ab060eace7df23c2f4b.jpg (deflated 1%)
  adding: ../content/train/images/maksssksksss488_png.rf.dbb75e52afa16c0aa6b70910823122ca.jpg (deflated 1%)
  adding: ../content/train/images/maksssksksss339_png.rf.1fca28ee4834b4ce7107f46ed5eb59e7.jpg (deflated 2%)
  adding: ../content/train/images/COCO_val2014_000000262334.jpg (deflated 1%)
  adding: ../content/train/images/maksssksksss440_png.rf.4d2650d771c4e3778eae8a131cbc0864.jpg (deflated 1%)
  adding: ../content/train/images/maksssksksss748_png.rf.d560186ea0eede2972b7c37be4aa66e1.jpg (deflat

In [None]:
!rm -rf yolo_new