In [None]:
# pip install ipykernel

In [None]:
!pip uninstall labelImg tensorflow opencv-python matplotlib albumentations

In [None]:
import time
import os
import uuid
import cv2 as cv 

In [None]:
IMAGES_PATH=os.path.join('data','images')
number_of_images = 30 

In [None]:
cap = cv.VideoCapture(0)

for imgnum in range(number_of_images):
    print('Collecting images{}'.format(imgnum))
    ret , frame = cap.read()
    imgname=os.path.join(IMAGES_PATH,f'{str(uuid.uuid1())}.jpg')
    cv.imwrite(imgname,frame)
    cv.imshow('Frame',frame)
    time.sleep(0.5)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()

### Annotate with LabelImg

In [None]:
!labelImg

In [None]:
import tensorflow as tf
import cv2 as cv
import json
import numpy as np
from matplotlib import pyplot as plt

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu,True)

In [None]:
tf.config.list_physical_devices()

In [None]:
images = tf.data.Dataset.list_files('data\\images\\*.jpg')

In [None]:
images.as_numpy_iterator().next()

In [None]:
def load_files(x):
    byte_img = tf.io.read_file(x)
    img = tf.io.decode_jpeg(byte_img)
    return img

In [None]:
images = images.map(load_files)

In [None]:
images.as_numpy_iterator().next()

In [None]:
type(images)

In [None]:
image_gen = images.batch(4).as_numpy_iterator()

In [None]:
plt_images = image_gen.next()
fig,ax = plt.subplots(ncols= 4, figsize=(20,20))
for idx,image in enumerate(plt_images):
    ax[idx].imshow(image)
plt.show()

In [None]:
## Split Data Manually into train, test and val

In [None]:
for folder in ['train','test','val']:
    for file in os.listdir(os.path.join('data',folder,'images')):
        filename = file.split('.')[0]+'.json'
        existing_filepath = os.path.join('data','labels',filename)
        if os.path.exists(existing_filepath):
            newfile_path = os.path.join('data',folder,'labels',filename)
            os.replace(existing_filepath,newfile_path)

In [None]:
img = cv.imread(os.path.join('data','train','images','1eba0ce5-4843-11f0-a20e-dbf8433c50c5.jpg'))
plt.imshow(img)
img.shape

## Apply Albumentations

In [None]:
import albumentations as alb

In [None]:
augmentor = alb.Compose([alb.RandomCrop(width=450,height=450),
                         alb.HorizontalFlip(p=0.5),
                         alb.RandomBrightnessContrast(p=0.2),
                         alb.RandomGamma(p=0.2),
                         alb.RGBShift(p=0.2),
                         alb.VerticalFlip(p=0.5)
                         ],
                         bbox_params=alb.BboxParams(format='albumentations',label_fields=['class_labels']))

In [None]:
img = cv.imread(os.path.join('data','train','images','1eba0ce5-4843-11f0-a20e-dbf8433c50c5.jpg'))
with open(os.path.join('data','train','labels','1eba0ce5-4843-11f0-a20e-dbf8433c50c5.json'),'r') as f:
    label = json.load(f)
plt.imshow(img)

In [None]:
label

Great — you’re working with **Create ML JSON annotation format** (used by Apple’s [Create ML](https://developer.apple.com/machine-learning/create-ml/)), which is different from formats like COCO or YOLO.

Let me break it down clearly:

---

### 📦 Sample Annotation:

```json
{
  "image": "1eba0ce5-4843-11f0-a20e-dbf8433c50c5.jpg",
  "annotations": [
    {
      "label": "face",
      "coordinates": {
        "x": 251.99,
        "y": 235.82,
        "width": 202.0,
        "height": 290.0
      }
    }
  ]
}
```

---

### 🧠 What It Means:

This annotation describes **1 bounding box** on the image `"1eba0ce5-4843-11f0-a20e-dbf8433c50c5.jpg"`.

The **coordinates** use the following system:

| Key      | Meaning                                              |
| -------- | ---------------------------------------------------- |
| `x`      | **X-center** of the bounding box (horizontal center) |
| `y`      | **Y-center** of the bounding box (vertical center)   |
| `width`  | Width of the bounding box                            |
| `height` | Height of the bounding box                           |

---

### 🖼️ Visual Representation:

```
             width
      <------------------>
       _________
      |         |
      |  BOX    |      ↑
      |         |      |
      |_________|      | height
             ↑
           (x, y) = center of box
```

---

### ✅ Convert to `[x_min, y_min, x_max, y_max]` (e.g., for Albumentations or COCO):

```python
x_center = 251.99
y_center = 235.82
width = 202.0
height = 290.0

x_min = x_center - width / 2 = 251.99 - 101 = 150.99
y_min = y_center - height / 2 = 235.82 - 145 = 90.82
x_max = x_center + width / 2 = 251.99 + 101 = 352.99
y_max = y_center + height / 2 = 235.82 + 145 = 380.82
```

➡️ Final bounding box:

```python
[x_min, y_min, x_max, y_max] = [150.99, 90.82, 352.99, 380.82]
```

---

### 📝 Summary:

| Format Key | Description         | Example |
| ---------- | ------------------- | ------- |
| `x`        | Center X of the box | 251.99  |
| `y`        | Center Y of the box | 235.82  |
| `width`    | Box width           | 202.0   |
| `height`   | Box height          | 290.0   |

To use it with tools like Albumentations, you’ll likely need to convert it to `[x_min, y_min, x_max, y_max]`.

Would you like a ready-made function to do this conversion for an entire CreateML-style JSON list?


In [None]:
label[0]['annotations'][0]['coordinates']

In [None]:
coords = [0,0,0,0]
coords[0] = label[0]['annotations'][0]['coordinates']['x'] - label[0]['annotations'][0]['coordinates']['width'] / 2
coords[1] = label[0]['annotations'][0]['coordinates']['y'] - label[0]['annotations'][0]['coordinates']['height'] /2
coords[2] = label[0]['annotations'][0]['coordinates']['x'] + label[0]['annotations'][0]['coordinates']['width'] / 2
coords[3] = label[0]['annotations'][0]['coordinates']['y'] + label[0]['annotations'][0]['coordinates']['height'] /2

In [None]:
coords

In [None]:
coords = list(np.divide(coords,[640,480,640,480])) # [width,height,width,height]

In [None]:
coords

In [None]:
augmented = augmentor(image=img,bboxes=[coords],class_labels=['face'])

In [None]:
cv.rectangle(augmented['image'],
            tuple(np.multiply(augmented['bboxes'][0][:2],[450,450]).astype(int)),
            tuple(np.multiply(augmented['bboxes'][0][2:],[450,450]).astype(int)),
            (255,0,0),2)

plt.imshow(augmented['image'])

In [None]:
for partition in ['train','test','val']:
    for image in os.listdir(os.path.join('data',partition,'images')):
        img = cv.imread(os.path.join('data',partition,'images',image))
        coords = [0,0,0.00001,0.00001]

        label_path = os.path.join('data',partition,'labels',f'{image.split(".")[0]}.json')
        if os.path.exists(label_path):
            with open(label_path,'r') as f:
                label = json.load(f)
            coords[0] = label[0]['annotations'][0]['coordinates']['x'] - label[0]['annotations'][0]['coordinates']['width'] / 2
            coords[1] = label[0]['annotations'][0]['coordinates']['y'] - label[0]['annotations'][0]['coordinates']['height'] /2
            coords[2] = label[0]['annotations'][0]['coordinates']['x'] + label[0]['annotations'][0]['coordinates']['width'] / 2
            coords[3] = label[0]['annotations'][0]['coordinates']['y'] + label[0]['annotations'][0]['coordinates']['height'] /2
            coords = list(np.divide(coords,[640,480,640,480]))
        
        try:
            for x in range(60):
                augmented = augmentor(image=img, bboxes=[coords], class_labels=['face'])
                cv.imwrite(os.path.join('aug_data',partition,'images',f'{image.split(".")[0]}.{x}.jpg'),augmented['image'])

                annotation = {}
                annotation['image'] = image
                if os.path.exists(label_path):
                    if len(augmented['bboxes']) == 0:
                        annotation['bbox'] = [0,0,0,0]
                        annotation['class'] = 0
                    else:
                        annotation['bbox'] = augmented['bboxes'][0]
                        annotation['class'] = 1
                else:
                    annotation['bbox'] = [0,0,0,0]
                    annotation['class'] = 0
                
                with open(os.path.join('aug_data',partition,'labels',f'{image.split(".")[0]}.{x}.json'),'w') as f:
                    json.dump(annotation,f)
        except Exception as e:
            print(e)

In [None]:
train_images = tf.data.Dataset.list_files('aug_data\\train\\images\\*.jpg',shuffle=False)
train_images = train_images.map(load_files)
train_images = train_images.map(lambda x: tf.image.resize(x,(120,120)))
train_images = train_images.map(lambda x: x/255)

In [None]:
test_images = tf.data.Dataset.list_files('aug_data\\test\\images\\*.jpg',shuffle=False)
test_images = test_images.map(load_files)
test_images = test_images.map(lambda x: tf.image.resize(x,(120,120)))
test_images = test_images.map(lambda x: x/255)

In [None]:
val_images = tf.data.Dataset.list_files('aug_data\\val\\images\\*.jpg',shuffle=False)
val_images = val_images.map(load_files)
val_images = val_images.map(lambda x: tf.image.resize(x,(120,120)))
val_images = val_images.map(lambda x: x/255)

In [None]:
def load_labels(label_path):
    with open(label_path.numpy(),'r',encoding="utf-8") as f:
        label = json.load(f)
    return [label['class']],label['bbox']

In [None]:
def parse_label(x):
    class_label, bbox = tf.py_function(load_labels, [x], [tf.uint8, tf.float16])
    class_label.set_shape([1])    # shape must be defined
    bbox.set_shape([4])           # shape must be defined
    return class_label, bbox

In [None]:
train_labels = tf.data.Dataset.list_files('aug_data\\train\\labels\\*.json', shuffle=False)
train_labels = train_labels.map(parse_label)

In [None]:
test_labels = tf.data.Dataset.list_files('aug_data\\test\\labels\\*.json', shuffle=False)
test_labels = test_labels.map(parse_label)

In [None]:
val_labels = tf.data.Dataset.list_files('aug_data\\val\\labels\\*.json', shuffle=False)
val_labels = val_labels.map(parse_label)

In [None]:
for img, label in train.take(1):
    print("Image shape:", img.shape)
    print("Label[0] shape:", label[0].shape)
    print("Label[1] shape:", label[1].shape)

In [None]:
# train_labels = tf.data.Dataset.list_files('aug_data\\train\\labels\\*.json', shuffle=False)
# train_labels = train_labels.map(lambda x: tf.py_function(load_labels,[x],[tf.uint8,tf.float16])) 

In [None]:
# test_labels = tf.data.Dataset.list_files('aug_data\\test\\labels\\*.json', shuffle=False)
# test_labels = test_labels.map(lambda x: tf.py_function(load_labels,[x],[tf.uint8,tf.float16]))

In [None]:
# val_labels = tf.data.Dataset.list_files('aug_data\\val\\labels\\*.json', shuffle=False)
# val_labels = val_labels.map(lambda x: tf.py_function(load_labels,[x],[tf.uint8,tf.float16]))

In [None]:
len(train_images),len(train_labels),len(test_images), len(test_labels), len(val_images), len(val_labels)

In [None]:
train = tf.data.Dataset.zip((train_images,train_labels))
train = train.shuffle(5000)
train = train.batch(8)
train = train.prefetch(4)

In [None]:
test = tf.data.Dataset.zip((test_images,test_labels))
test = test.shuffle(5000)
test = test.batch(8)
test = test.prefetch(4)

In [None]:
val = tf.data.Dataset.zip((val_images,val_labels))
val = val.shuffle(5000)
val = val.batch(8)
val = val.prefetch(4)

In [None]:
data_samples = train.as_numpy_iterator()

In [None]:
res = data_samples.next()

---------------------------------------------------------------------------
error                                     Traceback (most recent call last)
Cell In[381], line 5
      3 sample_image = res[0][idx]
      4 sample_coords = res[1][1][idx]
----> 5 cv.rectangle(sample_image,
      6              tuple(np.multiply(sample_coords[:2],[120,120]).astype(int)),
      7              tuple(np.multiply(sample_coords[2:],[120,120]).astype(int)),
      8              (255,0,0),2
      9              )
     10 ax[idx].imshow(sample_image)

error: OpenCV(4.11.0) :-1: error: (-5:Bad argument) in function 'rectangle'
> Overload resolution failed:
>  - img marked as output argument, but provided NumPy array marked as readonly
>  - img marked as output argument, but provided NumPy array marked as readonly
>  - Expected Ptr<cv::UMat> for argument 'img'
>  - Expected Ptr<cv::UMat> for argument 'img'

The error you're encountering:

```
error: OpenCV(4.11.0) :-1: error: (-5:Bad argument) in function 'rectangle'
> img marked as output argument, but provided NumPy array marked as readonly
```

…means that OpenCV's `cv.rectangle()` is trying to modify an image (`sample_image`), but the array is **read-only** — it was likely returned in a non-writeable format.

---

### ✅ Solution

You need to make a **writable copy** of the image before modifying it. You can do this with `.copy()`.

### ✅ Updated code:

```python
fig, ax = plt.subplots(ncols=4, figsize=(20, 20))

for idx in range(4):
    sample_image = res[0][idx].copy()  # Make a writable copy
    sample_coords = res[1][1][idx]
    
    # Convert normalized coordinates to image scale
    top_left = tuple(np.multiply(sample_coords[:2], [120, 120]).astype(int))
    bottom_right = tuple(np.multiply(sample_coords[2:], [120, 120]).astype(int))
    
    # Draw rectangle
    cv.rectangle(sample_image, top_left, bottom_right, (255, 0, 0), 2)
    
    # Show the image
    ax[idx].imshow(sample_image)
```

---

### 📝 Optional Improvements:

* Make sure `[120, 120]` matches the actual size of your image if not fixed.
* Consider adding `ax[idx].axis('off')` to hide axis labels.

Let me know if you want help scaling coordinates or displaying more images.


In [None]:
fig,ax=plt.subplots(ncols=4,figsize=(20,20))
for idx in range(4):
    sample_image = res[0][idx].copy()
    sample_coords = res[1][1][idx]
    cv.rectangle(sample_image,
                 tuple(np.multiply(sample_coords[:2],[120,120]).astype(int)),
                 tuple(np.multiply(sample_coords[2:],[120,120]).astype(int)),
                 (255,0,0),2
                 )
    ax[idx].imshow(sample_image)

## Deep Learning Model 

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Dense, GlobalMaxPooling2D
from tensorflow.keras.applications import VGG16

In [None]:
vgg = VGG16(include_top=False)

In [None]:
vgg.summary()

In [None]:
def build_model():
    input_layer = Input(shape=(120,120,3))
    vgg = VGG16(include_top=False)(input_layer)
    
    f1 = GlobalMaxPooling2D()(vgg)
    class1 = Dense(2048, activation='relu')(f1)
    class2 = Dense(1,activation='sigmoid')(class1)

    f2 = GlobalMaxPooling2D()(vgg)
    regress1 = Dense(2048, activation='relu')(f2)
    regress2 = Dense(4,activation='sigmoid')(regress1)

    facetracker = Model(inputs= input_layer, outputs=[class2,regress2])
    return facetracker

In [None]:
facetracker = build_model()

In [None]:
X,y = train.as_numpy_iterator().next()
X.shape

In [None]:
y

In [None]:
classes, coords= facetracker.predict(X)

In [None]:
classes, coords

In [None]:
batches_per_epoch = len(train)
lr_decay = (1./0.75 - 1)/batches_per_epoch

In [None]:
opt =tf.keras.optimizers.Adam(learning_rate=0.0001,decay = lr_decay)

In [None]:
def localization_loss(y_true, yhat):
    delta_coord = tf.reduce_sum(tf.square(y_true[:,:2]- yhat[:,:2]))

    h_true = y_true[:,3] - y_true[:,1]
    w_true = y_true[:,2] - y_true[:,0]

    h_pred = yhat[:,3] - yhat[:,1]
    w_pred = yhat[:,2] - yhat[:,0]

    delta_size = tf.reduce_sum(tf.square( w_true - w_pred ) + tf.square( h_true - h_pred))
    return delta_coord + delta_size


In [None]:
classloss = tf.keras.losses.BinaryCrossentropy()
regressloss = localization_loss

In [None]:
localization_loss(y[1],coords)

In [None]:
classloss(y[0],classes)

In [None]:
regressloss(y[1],coords)

In [None]:
X, y = train.as_numpy_iterator().next()
tf.print("Type of y:", type(y))


In [None]:
type(y)

In [None]:
class FaceTracker(Model): 
    def __init__(self, eyetracker,  **kwargs): 
        super().__init__(**kwargs)
        self.model = eyetracker

    def compile(self, opt, classloss, localizationloss, **kwargs):
        super().compile(**kwargs)
        self.closs = classloss
        self.lloss = localizationloss
        self.opt = opt
    
    def train_step(self, batch, **kwargs): 
        
        X, y = batch
        
        with tf.GradientTape() as tape: 
            classes, coords = self.model(X, training=True)
            
            batch_classloss = self.closs(tf.cast(y[0], tf.float32), classes)
            batch_localizationloss = self.lloss(tf.cast(y[1], tf.float32), coords)
            
            total_loss = batch_localizationloss+0.5*batch_classloss
            
            grad = tape.gradient(total_loss, self.model.trainable_variables)
        
        opt.apply_gradients(zip(grad, self.model.trainable_variables))
        
        return {"total_loss":total_loss, "class_loss":batch_classloss, "regress_loss":batch_localizationloss}
    
    def test_step(self, batch, **kwargs): 
        X, y = batch
        
        classes, coords = self.model(X, training=False)
        
        batch_classloss = self.closs(tf.cast(y[0], tf.float32), classes)
        batch_localizationloss = self.lloss(tf.cast(y[1], tf.float32), coords)
        total_loss = batch_localizationloss+0.5*batch_classloss
        
        return {"total_loss":total_loss, "class_loss":batch_classloss, "regress_loss":batch_localizationloss}
        
    def call(self, X, **kwargs): 
        return self.model(X, **kwargs)

In [None]:
# class FaceTracker(Model):
#     def __init__(self,eyetracker,**kwargs):
#         super().__init__(**kwargs)
#         self.model = eyetracker
    
#     def complie(self,opt,classloss,regressloss,**kwargs):
#         super().compile(**kwargs)
#         self.closs = classloss
#         self.lloss = regressloss
#         self.opt = opt

#     def train_step(self, batch, **kwargs):
#         X,y = batch
#         with tf.GradientTape() as tape:
#             classes , coords = self.model(X,training = True)
#             batch_classloss = self.closs(y[0],classes)
#             batch_localizationloss = self.lloss(tf.cast(y[1],tf.float32),coords)
#             total_loss = batch_localizationloss + 0.5 * batch_classloss

#             grad = tape.gradient(total_loss,self.model.trainable_variables)
#         opt.apply_gradients(zip(grad,self.model.trainable_variables))
#         return {"Total Loss":total_loss,"class loss":batch_classloss,"regress loss":batch_localizationloss}
    
#     def test_step(self, batch, **kwargs):
#         X,y = batch
#         classes, coords = self.model(X, training = False)
#         batch_classloss = self.closs(y[0],classes)
#         batch_localizationloss = self.lloss(tf.cast(y[1],tf.float32),coords)
#         total_loss = batch_localizationloss + 0.5* batch_classloss

#         return {"Total Loss":total_loss,"class loss":batch_classloss,"regress loss":batch_localizationloss}
    
#     def call(self,X,**kwargs):
#         return self.model(X,**kwargs)


In [None]:
model = FaceTracker(facetracker)

In [None]:
model.compile(opt,classloss,regressloss)

In [None]:
logdir = 'logs'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [None]:
for x, y in train.take(1):
    print("X shape:", x.shape)
    print("y[0] shape:", y[0].shape)
    print("y[1] shape:", y[1].shape)

In [None]:
hist = model.fit(train, epochs=40, validation_data=val, callbacks=[tensorboard_callback])

In [None]:
hist.history

In [None]:
fig,ax= plt.subplots(ncols=3,figsize=(20,5))

ax[0].plot(hist.history['total_loss'],color='teal',label='loss')
ax[0].plot(hist.history['val_total_loss'],color='orange',label='val loss')
ax[0].title.set_text('Loss')
ax[0].legend()

ax[1].plot(hist.history['class_loss'],color='teal',label='class loss')
ax[1].plot(hist.history['val_class_loss'],color='orange',label='val class loss')
ax[1].title.set_text('Class Loss')
ax[1].legend()

ax[2].plot(hist.history['regress_loss'],color='teal',label='regress loss')
ax[2].plot(hist.history['val_regress_loss'],color='orange',label='val regress loss')
ax[2].title.set_text('Regress Loss')
ax[2].legend()

plt.show()

In [None]:
test_data = test.as_numpy_iterator()

In [None]:
test_sample = test_data.next()
yhat = facetracker.predict(test_sample[0])
fig, ax = plt.subplots(ncols=4,figsize=(20,20))
for idx in range(4):
    sample_image = test_sample[0][idx].copy()
    sample_coords = yhat[1][idx]

    if yhat[0][idx] > 0.5:
        cv.rectangle(sample_image,
                     tuple(np.multiply(sample_coords[:2],[120,120]).astype(int)),
                     tuple(np.multiply(sample_coords[2:],[120,120]).astype(int)),
                     (255,0,0),2
                     )
    ax[idx].imshow(sample_image)

In [None]:
from tensorflow.keras.models import load_model
facetracker.save('FaceTracker.h5')

In [None]:
facetracker = load_model('FaceTracker.h5')

In [None]:
cap = cv.VideoCapture(0)
while cap.isOpened():
    _,frame = cap.read()
    frame = frame[50:500,50:500,:]
    rgb = cv.cvtColor(frame,cv.COLOR_BGR2RGB)
    resized = tf.image.resize(rgb,(120,120))
    yhat = facetracker.predict(np.expand_dims(resized/255,0))
    sample_coords = yhat[1][0]
    if yhat[0] > 0.5:
        cv.rectangle(frame,
                     tuple(np.multiply(sample_coords[:2],[450,450]).astype(int)),
                     tuple(np.multiply(sample_coords[2:],[450,450]).astype(int)),
                     (0,255,0),2
                     )
        
        # cv.rectangle(frame,
        #              tuple(np.add(np.multiply(sample_coords[:2],[450,450]).astype(int),[0,-30])),
        #              tuple(np.add(np.multiply(sample_coords[2:],[450,450]).astype(int),[80,0])),
        #              (255,0,0),1
        #              )
        
        cv.putText(frame,
                   'FACE',
                   tuple(np.add(np.multiply(sample_coords[:2],[450,450]).astype(int),[0,-5])),
                   cv.FONT_HERSHEY_SIMPLEX,
                   1,(255,255,255),2,cv.LINE_AA
                   )
    
    cv.imshow('FaceTracker',frame)
    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()
        