# 1. Import Dependencies

In [1]:
# Import opencv
import cv2 

# Import uuid
import uuid

# Import Operating System
import os

# Import time
import time

# 2. Define Images to Collect

In [2]:
labels = ['thumbsup', 'thumbsdown', 'jutsu',]
number_imgs = 20

# 3. Setup Folders 

In [3]:
IMAGES_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'collectedimages')

In [4]:
if not os.path.exists(IMAGES_PATH):
    os.mkdir(IMAGES_PATH)
        
for label in labels:
    path = os.path.join(IMAGES_PATH, label)
    if not os.path.exists(path):
        os.mkdir(path)

# 4. Capture Images

In [None]:
for label in labels:
    cap = cv2.VideoCapture(0)
    print('Collecting images for {}'.format(label))
    time.sleep(3)
    for imgnum in range(number_imgs):
        print('Collecting image {}'.format(imgnum))
        ret, frame = cap.read()
        imgname = os.path.join(IMAGES_PATH,label,label+'.'+'{}.jpg'.format(str(uuid.uuid1())))
        cv2.imwrite(imgname, frame)
        cv2.imshow('frame', frame)
        time.sleep(1)

# if cv2.waitKey(1) & 0xFF == ord('q'):
cap.release()
cv2.destroyAllWindows()

In [13]:
# # kalau ada error dan webcam ga mati
# cap.release()
# cv2.destroyAllWindows()

In [19]:
for label in labels:
    print(f'Label : {label} | {len(os.listdir(os.path.join(IMAGES_PATH, label))) }')

Label : thumbsup | 21
Label : thumbsdown | 20
Label : jutsu | 20


# 5. Image Labelling

In [None]:
!pip install --upgrade pyqt5 lxml

In [20]:
LABELIMG_PATH = os.path.join('Tensorflow', 'labelimg')

In [21]:
if not os.path.exists(LABELIMG_PATH):
    !mkdir {LABELIMG_PATH}
    !git clone https://github.com/tzutalin/labelImg {LABELIMG_PATH}

In [22]:
if os.name == 'posix':
    !make qt5py3
if os.name =='nt':
    !cd {LABELIMG_PATH} && pyrcc5 -o libs/resources.py resources.qrc

In [None]:
# Nanti muncul python gui, pakai python gui itu buat extract fitur
!cd {LABELIMG_PATH} && python labelImg.py

# 6. Move them into a Training and Testing Partition

In [27]:
TRAIN_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'train')
TEST_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'test')
ARCHIVE_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'archive.tar.gz')

In [28]:
if not os.path.exists(TRAIN_PATH):
    os.mkdir(TRAIN_PATH)

if not os.path.exists(TEST_PATH):
    os.mkdir(TEST_PATH)

In [181]:
import numpy as np 
import re
import shutil

np.random.seed(101)
# moving random data from classes folder to test and train
test_size = .2 
pattern1 = r'(.*)\.[jpg|xml]'

for label_dir in os.listdir(IMAGES_PATH):
    label_dir_path = os.path.join(IMAGES_PATH, label_dir)
    num_files = len( os.listdir( label_dir_path ) )
    print(label_dir, num_files)
    
    if num_files == 0:
        print(f'{label_dir} is moved, continue')
        continue
    
    # Karena ada 2 ekstensi file untuk 1 foto (jpg dan xml) 
    # maka diambil set nya
    data_name_temp = []
    for data in os.listdir(label_dir_path):
        searched = re.search(pattern1, data)
        data_name_temp.append(searched.groups()[0])
    
    data_name_temp = np.array(list(set(data_name_temp)), dtype=np.str)
    
    # untuk setiap data akan diacak 
    np.random.shuffle(data_name_temp)
    
    # Todo: Melakukan pengambilan data 
    break_point = int(len(data_name_temp) * test_size)
    
    test_candidate = data_name_temp[:break_point]
    
    # untuk setiap kandidat data test akan dipindahkan ke folder test 
    for data_test in test_candidate:
        # melakukan pencarian tiap data test pada direktori label dir path
        pattern2 = r'(' + data_test + r'\.[xmljpg]*)'
        
        for train_test_data in os.listdir(label_dir_path):
            searched = re.search(pattern2, train_test_data)
            # kalau bukan none maka test
            if searched is not None:
                test_data_use = searched.groups()[0]
                source_path = os.path.join(label_dir_path, test_data_use)
                # print("Test : ", source_path)
                shutil.move(source_path, TEST_PATH)
            # jika none maka train
            else:
                source_path = os.path.join(label_dir_path, train_test_data)
                # print("Train : ", source_path)
                shutil.move(source_path, TRAIN_PATH)
        
    # break

jutsu 0
jutsu is moved, continue
thumbsdown 0
thumbsdown is moved, continue
thumbsup 0
thumbsup is moved, continue


# (OPSIONAL) 7. File tar jika mau menyimpan data dalam bentuk TAR dan diakses dari Google Collab

In [179]:
import tarfile

def make_tarfile(output_filename, source_dir):
    with tarfile.open(output_filename, "w:gz") as tar:
        for source in source_dir:
            tar.add(source, arcname=os.path.basename(source))

In [180]:
make_tarfile(ARCHIVE_PATH, [TRAIN_PATH, TEST_PATH])