# 1. Import Dependencies

In [1]:
import cv2 
import uuid
import os
import time

# 2. Define Images Label

In [2]:
COLLECTED_IMG_PATH = r'.\Tensorflow\workspace\images\collectedimages'

labels = os.listdir(COLLECTED_IMG_PATH)
labels

['botolkaca',
 'botolplastik',
 'kaleng',
 'kardus',
 'karet',
 'kertas',
 'plastik',
 'sedotan']

# 3. Setup Folders 

In [3]:
IMAGES_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'collectedimages')

In [4]:
if not os.path.exists(IMAGES_PATH):
    !mkdir {IMAGES_PATH}
    
for label in labels:
    path = os.path.join(IMAGES_PATH, label)
    if not os.path.exists(path):
        os.mkdir(path)

# 4. Image Labelling

In [5]:
# !pip install --upgrade pyqt5 lxml



In [6]:
LABELIMG_PATH = os.path.join('Tensorflow', 'labelimg')

In [7]:
if not os.path.exists(LABELIMG_PATH):
    !mkdir {LABELIMG_PATH}
    !git clone https://github.com/tzutalin/labelImg {LABELIMG_PATH}

In [8]:
if os.name == 'posix':
    !make qt5py3
if os.name =='nt':
    !cd {LABELIMG_PATH} && pyrcc5 -o libs/resources.py resources.qrc

In [9]:
# Nanti muncul python gui, pakai python gui itu buat extract fitur
!cd {LABELIMG_PATH} && python labelImg.py

# 5. Copy them into a Training and Testing Partition

First delete the contents of the train and test directories!!

In [1]:
TRAIN_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'train')
TEST_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'test')
ARCHIVE_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'archive.tar.gz')

In [2]:
if not os.path.exists(TRAIN_PATH):
    os.mkdir(TRAIN_PATH)

if not os.path.exists(TEST_PATH):
    os.mkdir(TEST_PATH)

## 5.1 Hasea

In [12]:
# Shuffle and copy collected image to train and test dir
import random
import shutil

TRAIN_TO_TOTAL_RATIO = 0.7
RANDOM_SEED = 101

random.seed(RANDOM_SEED)

#Creating list for train and test
for file_folder_name in labels:
    xml_list = []
    file_for_test = []
    file_dir = os.path.join(IMAGES_PATH, file_folder_name)
    xml_list = [file for file in os.listdir(file_dir) if file.endswith('xml')]
    train_num = int((TRAIN_TO_TOTAL_RATIO*len(xml_list))//1)
    file_for_training = list(random.sample(xml_list, train_num))
    file_for_test = [file for file in xml_list if file not in file_for_training]
    
    #copy file to training
    for file in file_for_training:
        file_xml = os.path.join(file_dir, file)
        file_jpg = os.path.join(file_dir, file[:-3]) + 'jpg'
        jpg_target = os.path.join(TRAIN_PATH, file[:-3]) + 'jpg'
        xml_target = os.path.join(TRAIN_PATH, file)
        shutil.copyfile(file_xml, xml_target)
        shutil.copyfile(file_jpg, jpg_target)
    
    #copy file to test
    for file in file_for_test:
        file_xml = os.path.join(file_dir, file)
        file_jpg = os.path.join(file_dir, file[:-3]) + 'jpg'
        jpg_target = os.path.join(TEST_PATH, file[:-3]) + 'jpg'
        xml_target = os.path.join(TEST_PATH, file)
        shutil.copyfile(file_xml, xml_target)
        shutil.copyfile(file_jpg, jpg_target)

Cek ukuran test dan train

In [13]:
print(f"Train data : {len(os.listdir(TRAIN_PATH))} data")
print(f"Test data : {len(os.listdir(TEST_PATH))} data")

Train data : 6530 data
Test data : 2804 data


# (OPSIONAL) 7. File tar jika mau menyimpan data dalam bentuk TAR dan diakses dari Google Collab

In [None]:
import tarfile

def make_tarfile(output_filename, source_dir):
    with tarfile.open(output_filename, "w:gz") as tar:
        for source in source_dir:
            tar.add(source, arcname=os.path.basename(source))

In [None]:
make_tarfile(ARCHIVE_PATH, [TRAIN_PATH, TEST_PATH])

In [None]:
# wokwow