In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
%cd /content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection

/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection


In [4]:
from pathlib import Path
import os
import cv2
from shutil import copyfile,rmtree
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator,img_to_array,load_img
import numpy as np

In [5]:
# Path to the folder containing images
images_folder = '/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/faces'

In [6]:
# List all image files in the folder
image_files = [os.path.join(images_folder, file) for file in os.listdir(images_folder) if file.endswith(('.jpg', '.jpeg', '.png'))]

In [7]:
header = ['image_path', 'x_min', 'y_min', 'x_max', 'y_max', 'class_label']

In [8]:
# Assuming you have a list of labels for each image (replace it with your actual labels)
labels=[]
for fl in image_files:
   lb=os.path.basename(fl).split("/")[0].split(".")[0]
   labels.append(lb)

# labels dictionary
class_lab={k:v for (v,k) in enumerate(labels)}

In [9]:
# Create train and val directories
rmtree('/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/train')
Path('/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/train/images').mkdir(parents=True, exist_ok=True)
rmtree('/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/val')
Path('/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/val/images').mkdir(parents=True, exist_ok=True)

In [10]:
# create labels
Path('/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/train/labels').mkdir(parents=True, exist_ok=True)
Path('/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/val/labels').mkdir(parents=True, exist_ok=True)

In [11]:
df=pd.DataFrame(columns=header)
for file,lb in zip(image_files,labels):

  image = cv2.imread(file)

  if image is None:
     print(f"Image not found: {file}")
     continue
  # reshape image
  image=cv2.resize(image,(640,640))

  # remove original image and save resized one
  os.remove(file)

  # formating images to png
  name=os.path.basename(file).split(".")[0]
  file=f"/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/faces/{name}.png"

  # saving image
  cv2.imwrite(file,image)

  # Convert the image to grayscale
  gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
  # Load the pre-trained face detector
  face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
  # Detect faces in the image
  faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

   # Threshold the image to create a binary mask of the background
  _, mask = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)

   # Invert the mask
  mask_inv = cv2.bitwise_not(mask)

   # Create a black background
  black_background = np.zeros_like(image, np.uint8)

   # Copy the original image onto the black background using the inverted mask
  masked_image = cv2.bitwise_and(image, image, mask=mask_inv)
  image = cv2.add(black_background, masked_image)

  # Extract bounding box coordinates for each face
  annotation = [{'x_min':(x + w / 2)/640, 'y_min':(y + h / 2)/640, 'x_max':w/640, 'y_max':h/640} for (x, y, w, h) in faces]

  try:
    image_labels= pd.DataFrame({'image_path': [file], 'x_min': [annotation[0]['x_min']], 'y_min': [annotation[0]['y_min']], 'x_max': [annotation[0]['x_max']], 'y_max':[annotation[0]['y_max']], 'class_label': [lb]})
  except:
    print(f"this image is not proper {file}")

  df=pd.concat([df,image_labels],ignore_index=True)

In [12]:
#validate
for index, row in df.iterrows():
    copyfile(row['image_path'], f'/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/val/images/{Path(row["image_path"]).name}')

In [13]:
#validate
for index in df.index:
    row=df.loc[index]
    pt=row["image_path"]
    fl=os.path.basename(pt).split("/")[0].split(".")[0]
    with open(f"/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/val/labels/{fl}.txt",'w') as f:
         cl=row['class_label']
         f.writelines([f"{class_lab[cl]} {row['x_min']} {row['y_min']} {row['x_max']} {row['y_max']}"])

In [14]:
#train

# agumenatation
dataaug=ImageDataGenerator(
    shear_range=0.2,
    zoom_range=0.2,
    fill_mode="nearest",
    brightness_range=(0.02,0.3),
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
)

file="/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/agumented_img"
rmtree(file)
Path("/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/agumented_img").mkdir(parents=True, exist_ok=True)
for file,lb in zip(image_files,labels):
     img=load_img(file)
     image = img_to_array(img)
     image= image.reshape((1,) + image.shape)
     if image is None:
      print(f"Image not found: {file}")
      continue
     fl=os.path.basename(file).split("/")[0].split(".")[0]
     i=0
     for batch in dataaug.flow(image,batch_size=1,save_to_dir="/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/agumented_img",save_prefix=f"{fl}",save_format="jpeg"):
       i+=1
       if i>4:
         break

In [15]:
# Path to the folder containing images agumented image
images_folder_aug = '/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/agumented_img'

In [16]:
# List all image files in the folder
image_files_aug = [os.path.join(images_folder_aug, file) for file in os.listdir(images_folder_aug) if file.endswith(('.jpg', '.jpeg', '.png'))]


In [17]:
df2=pd.DataFrame(columns=header)
for file in image_files_aug:

  image = cv2.imread(file)

  if image is None:
     print(f"Image not found: {file}")
     continue

  # Convert the image to grayscale
  gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
  # Load the pre-trained face detector
  face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
  # Detect faces in the image
  faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

  # Threshold the image to create a binary mask of the background
  _, mask = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)

  # Invert the mask
  mask_inv = cv2.bitwise_not(mask)

  # Create a black background
  black_background = np.zeros_like(image, np.uint8)

  # Copy the original image onto the black background using the inverted mask
  masked_image = cv2.bitwise_and(image, image, mask=mask_inv)
  image = cv2.add(black_background, masked_image)

  # Extract bounding box coordinates for each face
  annotation = [{'x_min':(x + w / 2)/640, 'y_min':(y + h / 2)/640, 'x_max':w/640, 'y_max':h/640} for (x, y, w, h) in faces]

  fl=os.path.basename(file).split("/")[0].split(".")[0].split("_")[0]
  cpth=f"/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/faces/{fl}.png"
  cl=df[df["image_path"]==cpth]['class_label'].values[0]


  try:
    image_labels= pd.DataFrame({'image_path': [file], 'x_min': [annotation[0]['x_min']], 'y_min': [annotation[0]['y_min']], 'x_max': [annotation[0]['x_max']], 'y_max':[annotation[0]['y_max']], 'class_label': [cl]})
  except:
    print(f"Deleted as this image is not proper {file}")
    os.remove(file)


  df2=pd.concat([df2,image_labels],ignore_index=True)

Deleted as this image is not proper /content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/agumented_img/Alexander_0_9992.jpeg
Deleted as this image is not proper /content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/agumented_img/Daniel_0_9230.jpeg
Deleted as this image is not proper /content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/agumented_img/John_0_1424.jpeg
Deleted as this image is not proper /content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/agumented_img/John_0_563.jpeg
Deleted as this image is not proper /content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/agumented_img/Robert_0_9376.jpeg
Deleted as this image is not proper /content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/agumented_img/Robert_0_7667.jpeg
Deleted as this image is not proper /content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/agumented_img/Joseph_0_7827.jpeg
Deleted as this image is not proper /content/drive/MyDrive/Data_

In [18]:
train_df=pd.concat([df,df2],ignore_index=True)

In [19]:
#train image
for index, row in train_df.iterrows():
    copyfile(row['image_path'], f'/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/train/images/{Path(row["image_path"]).name}')


In [20]:
#train labels
for index in train_df.index:
    row=train_df.loc[index]
    pt=row["image_path"]
    fl=os.path.basename(pt).split("/")[0].split(".")[0]
    with open(f"/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/train/labels/{fl}.txt",'w') as f:
         cl=row['class_label']
         f.writelines([f"{class_lab[cl]} {row['x_min']} {row['y_min']} {row['x_max']} {row['y_max']}"])

In [21]:
with open('/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/csv_main/annotations.csv', 'w') as f:
    df.to_csv(f)

In [22]:
train_yaml = f"""
train: /content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/train/images
val: /content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/val/images

nc: {len(labels)}  # Number of classes
names: {labels}  # Class names
"""

In [23]:
with open('/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/data.yaml', 'w') as f:
    f.write(train_yaml)

In [24]:
# from google.colab.patches import cv2_imshow


# # Load the pre-trained face cascade
# face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# # Load the image
# image = cv2.imread('/content/drive/MyDrive/Data_Science/ML-DL_projects/facedetection/datasets/train/images/shidhu_0_9153.jpeg')

# # Convert image to grayscale
# gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# # Threshold the image to create a binary mask of the background
# _, mask = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)

# # Invert the mask
# mask_inv = cv2.bitwise_not(mask)

# # Create a black background
# black_background = np.zeros_like(image, np.uint8)

# # Copy the original image onto the black background using the inverted mask
# masked_image = cv2.bitwise_and(image, image, mask=mask_inv)
# image = cv2.add(black_background, masked_image)

# # Detect faces in the black_background_with_image
# faces = face_cascade.detectMultiScale(image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))



# annotation = [{'x_min': (x + w/2)/640, 'y_min': (y+h/2)/640, 'x_max': w/640, 'y_max': h/640} for (x, y, w, h) in faces]
# print(annotation)

# for (x,y,w,h) in faces:
#   x_min=(x - w / 2)/640
#   y_min=(y - h / 2)/640
#   x_max=(x + w / 2)/640
#   y_max=(y + h / 2)/640
#   print(x_min,y_min,x_max,y_max)
#   print(x/640,y/640,w/640,h/640)
#   cv2.rectangle(image,(x+w,y+h),(x,y),(0,255,0),2)
# cv2_imshow(image)

