# 1. Setup and Get Data

## 1.1 Install Dependencies and Setup

In [7]:
!pip install labelme tensorflow==2.10.0 tensorflow-gpu==2.10.0 opencv-python matplotlib albumentations



## 1.2 Collect Images Using OpenCV

In [8]:
import os # makes it easier to navigate tough different file paths
import time # allows to use time
import uuid # allows to create uniform unique identifier, example: for unique file names for images (more professional)
import cv2 # allows to use different senses like different camaras etc. (for computer vision), doesnt work for colab

In [86]:
IMAGES_PATH = os.path.join('data', 'images')
number_images = 20

In [87]:
cap = cv2.VideoCapture(0)
for imgnum in range(number_images):
    print('Collection image {}'.format(imgnum))
    ret, frame = cap.read()
    imgname = os.path.join(IMAGES_PATH, f'{str(uuid.uuid1())}.jpg')
    cv2.imwrite(imgname, frame)
    cv2.imshow('frame', frame)
    time.sleep(0.5)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

Collection image 0
Collection image 1
Collection image 2
Collection image 3
Collection image 4
Collection image 5
Collection image 6
Collection image 7
Collection image 8
Collection image 9
Collection image 10
Collection image 11
Collection image 12
Collection image 13
Collection image 14
Collection image 15
Collection image 16
Collection image 17
Collection image 18
Collection image 19


## 1.3 Annotate Images with LabelMe

This command should run the package 'labelme'. 

In the prompt select a different output folder so the labels are saved in the correct folder.
Also hit autosave so it saves automaticly.

In [89]:
!labelme

[INFO   ] __init__:get_config:70 - Loading config file from: C:\Users\FRANC\.labelmerc


# 2. Review Dataset and Build Image Loading Function

## 2.1 Import TF and Deps

In [9]:
import tensorflow as tf
import json # our labels are in json format
import numpy as np
from matplotlib import pyplot as plt

## 2.2 Limit GPU Memory Growth

In [10]:
# Avoid OOM errors by setting GPU Memory Consumption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [11]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

## 2.3 Load Image into TF Data Pipeline

In [44]:
image_names = tf.data.Dataset.list_files('Data\\images\\*.jpg', shuffle=False)

In [45]:
image_names.as_numpy_iterator().next()

b'Data\\images\\003d213c-3f5f-11ee-926d-5ce42a8c8f38.jpg'

In [46]:
for element in image_names.as_numpy_iterator():
    print(element)

b'Data\\images\\003d213c-3f5f-11ee-926d-5ce42a8c8f38.jpg'
b'Data\\images\\008baf2e-3f5f-11ee-8e88-5ce42a8c8f38.jpg'
b'Data\\images\\00daeaf9-3f5f-11ee-a611-5ce42a8c8f38.jpg'
b'Data\\images\\012ab70e-3f5f-11ee-93b0-5ce42a8c8f38.jpg'
b'Data\\images\\01787553-3f5f-11ee-beb2-5ce42a8c8f38.jpg'
b'Data\\images\\01c6c943-3f5f-11ee-915d-5ce42a8c8f38.jpg'
b'Data\\images\\02147e2e-3f5f-11ee-a4af-5ce42a8c8f38.jpg'
b'Data\\images\\02635b40-3f5f-11ee-80a9-5ce42a8c8f38.jpg'
b'Data\\images\\02b22b02-3f5f-11ee-ac83-5ce42a8c8f38.jpg'
b'Data\\images\\03011142-3f5f-11ee-97d9-5ce42a8c8f38.jpg'
b'Data\\images\\034f723f-3f5f-11ee-81a9-5ce42a8c8f38.jpg'
b'Data\\images\\03a0c781-3f5f-11ee-8a9a-5ce42a8c8f38.jpg'
b'Data\\images\\03ef3058-3f5f-11ee-b4af-5ce42a8c8f38.jpg'
b'Data\\images\\043d7f58-3f5f-11ee-8ffc-5ce42a8c8f38.jpg'
b'Data\\images\\105e34ef-3f5f-11ee-a0c3-5ce42a8c8f38.jpg'
b'Data\\images\\10ae797a-3f5f-11ee-87af-5ce42a8c8f38.jpg'
b'Data\\images\\10fccffa-3f5f-11ee-bf3e-5ce42a8c8f38.jpg'
b'Data\\images

In [47]:
def load_image(x):
    byte_img = tf.io.read_file(x)
    img = tf.io.decode_jpeg(byte_img)
    return img

In [54]:
images = image_names.map(load_image) # images is a list of different arrays not one single big array (images[0] n.l)

In [56]:
images.as_numpy_iterator().next()

array([[[ 89,  90,  74],
        [ 89,  90,  74],
        [ 89,  90,  74],
        ...,
        [141, 143, 138],
        [145, 147, 142],
        [149, 151, 146]],

       [[ 86,  87,  71],
        [ 87,  88,  72],
        [ 89,  90,  74],
        ...,
        [141, 143, 138],
        [146, 148, 143],
        [150, 152, 147]],

       [[ 85,  86,  70],
        [ 87,  88,  72],
        [ 88,  89,  73],
        ...,
        [141, 146, 140],
        [144, 149, 143],
        [148, 153, 147]],

       ...,

       [[ 12,  15,   4],
        [ 14,  17,   6],
        [ 15,  18,   7],
        ...,
        [149, 151, 140],
        [150, 151, 143],
        [151, 152, 144]],

       [[ 12,  15,   4],
        [ 14,  17,   6],
        [ 14,  18,   4],
        ...,
        [148, 150, 139],
        [149, 151, 140],
        [150, 152, 141]],

       [[ 13,  16,   5],
        [ 15,  18,   7],
        [ 15,  19,   5],
        ...,
        [147, 149, 138],
        [148, 150, 139],
        [149, 151, 140]]

In [80]:
type(images) # whats the difference?

tensorflow.python.data.ops.dataset_ops.MapDataset

## 2.4 View Raw Images with Matplotlib

In [70]:
image_generator = images.batch(4).as_numpy_iterator() # put the data into batches of 4

In [78]:
plot_images = image_generator.next()

In [None]:
fig, ax = plt.subplots(ncols=4, figsize=(20, 20))
for idx, image in enumerate(plot_images):
    ax[idx].imshow(image)
plt.show()

# 3. Partition Unaugmented Data

## 3.1 Manually split data into train test and val

Train: 71 images <br>
Test: 21 images <br>
Val: 19 images

## 3.2 Move the Matching Labels

In [5]:
for folder in ['train', 'test', 'val']:
    for file in os.listdir(os.path.join('Data', folder, 'images')):
        
        filename = file.split('.')[0]+'.json'
        existing_filepath = os.path.join('Data', 'labels', filename)
        if os.path.exists(existing_filepath):
            new_filepath = os.path.join('Data', folder, 'labels', filename)
            os.replace(existing_filepath, new_filepath)

# 4. Apply Image Augmentation on Images and Labels using Albumentations

In [10]:
import albumentations as alb