# 1. Import Dependencies

In [1]:
!pip install opencv-python



In [5]:
# Import opencv
import cv2 

# Import uuid
import uuid

# Import Operating System
import os

# Import time
import time

# 2. Define Images to Collect

In [6]:
labels = ['thumbsup', 'thumbsdown']
number_imgs = 6

# 3. Setup Folders 

In [7]:
IMAGES_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'collectedimages')

In [8]:
if not os.path.exists(IMAGES_PATH):
    if os.name == 'posix':
        !mkdir -p {IMAGES_PATH}
    if os.name == 'nt':
         !mkdir {IMAGES_PATH}
for label in labels:
    path = os.path.join(IMAGES_PATH, label)
    if not os.path.exists(path):
        !mkdir {path}

In [27]:
for label in labels:
    path = os.path.join(IMAGES_PATH, label)
    for img_name in os.listdir(path):
        if label not in img_name:
            print("if ->  ", img_name, label)
            new_name = f"{label}_{img_name}"
            os.rename(os.path.join(path,img_name), os.path.join(path,new_name))
        print(img_name)

thumbsup_20230430_225824.jpg
thumbsup_20230430_225847_mfnr.jpg
thumbsup_20230430_230125_mfnr.jpg
thumbsup_20230430_230237_4.jpg
thumbsup_20230430_230541_mfnr.jpg
thumbsup_20230430_230557_1.jpg
thumbsdown_20230430_225824 (1).jpg
thumbsdown_20230430_225847_mfnr (1).jpg
thumbsdown_20230430_230443_mfnr (1).jpg
thumbsdown_20230430_230452_mfnr (1).jpg
thumbsdown_20230430_230737_mfnr.jpg
thumbsdown_20230430_230750_mfnr.jpg


# 4. Capture Images

Required images is placed into the respective folder

# 5. Image Labelling

In [29]:
!pip install --upgrade pyqt5 lxml



In [30]:
!pip list

Package            Version
------------------ --------
asttokens          2.2.1
backcall           0.2.0
colorama           0.4.6
comm               0.1.3
debugpy            1.6.7
decorator          5.1.1
executing          1.2.0
importlib-metadata 6.6.0
ipykernel          6.22.0
ipython            8.13.1
jedi               0.18.2
jupyter_client     8.2.0
jupyter_core       5.3.0
lxml               4.9.2
matplotlib-inline  0.1.6
nest-asyncio       1.5.6
numpy              1.24.3
opencv-python      4.7.0.72
packaging          23.1
parso              0.8.3
pickleshare        0.7.5
pip                23.1.2
platformdirs       3.5.0
prompt-toolkit     3.0.38
psutil             5.9.5
pure-eval          0.2.2
Pygments           2.15.1
PyQt5              5.15.9
PyQt5-Qt5          5.15.2
PyQt5-sip          12.12.1
python-dateutil    2.8.2
pywin32            306
pyzmq              25.0.2
setuptools         58.1.0
six                1.16.0
stack-data         0.6.2
tornado            6.3.1
traitl

In [31]:
LABELIMG_PATH = os.path.join('Tensorflow', 'labelimg')

In [33]:
if not os.path.exists(LABELIMG_PATH):
    !mkdir {LABELIMG_PATH}
    !git clone https://github.com/tzutalin/labelImg {LABELIMG_PATH}

In [34]:
if os.name == 'posix':
    !make qt5py3
if os.name =='nt':
    !cd {LABELIMG_PATH} && pyrcc5 -o libs/resources.py resources.qrc

In [36]:
!cd {LABELIMG_PATH} && python labelImg.py

# 6. Move them into a Training and Testing Partition

In [40]:
TRAIN_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'train')
TEST_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'test')
ARCHIVE_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'archive.tar.gz')

In [41]:
# automate train test splitting
from glob import glob
import shutil
import math

TEST_SIZE = 0.2

if not os.path.exists(TRAIN_PATH):
    !mkdir {TRAIN_PATH}
if not os.path.exists(TEST_PATH):
    !mkdir {TEST_PATH}
    
print('using {} % of images as test data'.format(TEST_SIZE * 100))
for label in labels:
    path = os.path.join(IMAGES_PATH, label)
    xml_filenames = glob(path+'/*.xml')
    n_files = len(xml_filenames)
    n_test = math.floor(n_files * TEST_SIZE)
    for i, xml in enumerate(xml_filenames):
        jpg = xml.replace('.xml', '.jpg')
        if i >= n_test:
            dest_folder = TRAIN_PATH
        else:
            dest_folder = TEST_PATH
        shutil.move(xml, dest_folder)
        shutil.move(jpg, dest_folder)
    print('{} -> annotated_images: {} train: {} test: {}'.format(label, n_files, n_files - n_test, n_test))

using 20.0 % of images as test data
thumbsup -> annotated_images: 6 train: 5 test: 1
thumbsdown -> annotated_images: 6 train: 5 test: 1


# OPTIONAL - 7. Compress them for Colab Training

In [21]:
!tar -czf {ARCHIVE_PATH} {TRAIN_PATH} {TEST_PATH}