In [1]:
%pip install tensorflow numpy opencv-python-headless firebase_admin google-cloud-storage google-cloud-bigquery google-cloud-aiplatform google-cloud-pipeline-components kfp scikit-image ultralytics

Note: you may need to restart the kernel to use updated packages.


DEPRECATION: Loading egg at c:\python311\lib\site-packages\mask_rcnn-2.1-py3.11.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330

[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
# Importing necessary modules

import os
from google.cloud import aiplatform

from kfp.dsl import pipeline, component
from kfp import compiler

# Pipeline

### Setup

In [2]:
PROJECT_ID = "plenary-truck-411220"
PIPELINE_ROOT = "gs://faces_for_clusters"

In [3]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../keys/detectionKey.json"

In [4]:
aiplatform.init(project=PROJECT_ID, location="asia-south1")

### Components

In [5]:
# Detecting humans and returning human==TRUE images for creating dataset for clustering

@component(base_image="python:3.10", packages_to_install=["numpy==1.23.5", "kfp==2.6.0", "google-cloud-storage==2.14.0", "scikit-image==0.22.0", "ultralytics==8.1.12", "opencv-contrib-python-headless==4.9.0.80", "firebase-admin==6.4.0", "google-cloud-firestore==2.14.0", "pytz==2022.7.1"])
def test_images(threshold: float)->bool: # type: ignore

    from ultralytics import YOLO
    import cv2
    import firebase_admin
    from firebase_admin import firestore, credentials
    from datetime import datetime, timedelta
    from google.cloud.firestore_v1.base_query import FieldFilter
    from google.cloud import storage
    from skimage import io
    import pytz
    import numpy as np
    import pickle

    class human_detection:

        def __init__(self):
            self.model = YOLO("yolov8n.pt")
            self.image_verification = {0: [], 1: []}
            self.markerImages = []
            self.BUCKET_NAME = "faces_for_clustersdetection_pipeline"
            # Initialise a client
            self.storage_client = storage.Client("plenary-truck-411220")
            self.bucket = self.storage_client.get_bucket(self.BUCKET_NAME)
            # Create a blob object from the filepath
            self.blob = self.bucket.blob("keys/serviceAccountKey.json")
            # Download the file to a destination
            self.blob.download_to_filename("serviceAccountKey.json")
            # Create a bl   ob object from the filepath
            self.blob_2 = self.bucket.blob("keys/detectionKey.json")
            # Download the file to a destination
            self.blob_2.download_to_filename("detectionKey.json")

        def fetch_data(self):

            FIREBASE_CRED = "serviceAccountKey.json"

            cred = credentials.Certificate(FIREBASE_CRED)
            firebase_admin.initialize_app(cred)

            db = firestore.client()
            
            indian_timezone = pytz.timezone('Asia/Kolkata')

            current_datetime = datetime.now()
            current_datetime = indian_timezone.localize(current_datetime)
            rounded_hour = (current_datetime.hour // 6) * 6
            rounded_datetime = current_datetime.replace(hour=rounded_hour, minute=0, second=0, microsecond=0)

            start_time = rounded_datetime - timedelta(hours=6)
            end_time = rounded_datetime

            markers = db.collection("Markers").where(filter=FieldFilter('time', ">=", start_time)).where(filter=FieldFilter('time', "<=", end_time)).get()
            # markers = db.collection("Markers").get()
            for obj in markers:
                data = obj.to_dict()
                if data:
                    img = io.imread(data["imageUrl"])
                    date = data["time"] if "time" in data.keys() else datetime.now()
                    self.markerImages.append([data["id"], img, date])

        def forward(self, THRESHOLD_SCORE):

            for (image_id, img_init, date) in self.markerImages:

                detection = False
                cropped_images = []
                results = self.model(img_init)

                # Extract bounding boxes, classes, names, and confidences
                boxes = results[0].boxes.xyxy.tolist()
                classes = results[0].boxes.cls.tolist()
                names = results[0].names
                confidences = results[0].boxes.conf.tolist()

                # Iterate through the results
                for box, cls, conf in zip(boxes, classes, confidences):
                    x_min, y_min, x_max, y_max = box
                    x_min = int(x_min)
                    y_min = int(y_min)
                    x_max = int(x_max)
                    y_max = int(y_max)
                    confidence = conf
                    name = names[int(cls)]

                    if name == "person":
                        if ((confidence > THRESHOLD_SCORE) and (confidence <= 1.0)):
                            detection = True
                            cropped_images.append(img_init[y_min:y_max, x_min:x_max])

                if detection:
                    self.image_verification[1].append([image_id, cropped_images, date])
                else:
                    self.image_verification[0].append(image_id)

        def get_dict(self):

            CLOUD_CRED = "detectionKey.json"
            # Connecting to GCP
            storage_client = storage.Client.from_service_account_json(CLOUD_CRED)

            # Connecting to Bucket
            bucket = storage_client.get_bucket('faces_for_clusters')

            delete_prev_pkl = bucket.blob("verified_file.pkl")
            if delete_prev_pkl.exists():
                delete_prev_pkl.delete(if_generation_match=None)

            with open("verified_file.pkl", "wb") as f:
                pickle.dump(self.image_verification, f)

            object_name = bucket.blob("verified_file.pkl")
            object_name.upload_from_filename("verified_file.pkl")

            return self.image_verification

    model = human_detection()
    model.fetch_data()
    model.forward(threshold)
    verification = model.get_dict()[1]
    
    return len(verification) != 0

In [6]:
# Inserting data into clustering table for further usage

@component(base_image="python:3.10", packages_to_install=["numpy==1.23.5", "opencv-contrib-python-headless==4.9.0.80", "scikit-image==0.22.0", "firebase-admin==6.4.0", "google-cloud-storage==2.14.0", "google-cloud-bigquery==3.17.2", "kfp==2.6.0"])
def insert_table(verification_exists: bool):

    import cv2
    import tempfile
    from firebase_admin import storage
    from google.cloud import storage, bigquery
    import numpy as np
    import pickle

    BUCKET_NAME_PIPELINE = "faces_for_clustersdetection_pipeline"
    VERIFICATION_BUCKET = "faces_for_clusters"

    if verification_exists:
        # Initialise a client
        storage_client = storage.Client("plenary-truck-411220")

        bucket_pipeline = storage_client.get_bucket(BUCKET_NAME_PIPELINE)
        # Create a blob object from the filepath
        blob = bucket_pipeline.blob("keys/detectionKey.json")
        # Download the file to a destination
        blob.download_to_filename("detectionKey.json")

        ver_bucket = storage_client.get_bucket(VERIFICATION_BUCKET)
        blob = ver_bucket.blob("verified_file.pkl")
        blob.download_to_filename("verified_file.pkl")

        CLOUD_CRED = "detectionKey.json"

        data = ""

        with open('verified_file.pkl', 'rb') as f:
            data = pickle.load(f)

        # Connecting to GCP
        storage_client = storage.Client.from_service_account_json(CLOUD_CRED)

        # Connecting to Bucket
        bucket = storage_client.get_bucket('faces_for_clusters')

        bigquery_client = bigquery.Client.from_service_account_json(CLOUD_CRED)

        table_ref = bigquery_client.dataset("clustering_dataset").table("faces")
        table = bigquery_client.get_table(table_ref)

        insert_rows = []

        delete_cropped_folder = bucket.blob("CroppedImages/")
        if delete_cropped_folder.exists():
            delete_cropped_folder.delete(if_generation_match=None)

        for (id, cropped_images, date) in data[1]:
            for (num, img) in enumerate(cropped_images):
                img = np.asarray(img, dtype='uint8')
                with tempfile.TemporaryDirectory() as tempDir:
                    filename = str(id) + "_" + str(num) + ".png"
                    cv2.imwrite(os.path.join(tempDir, str(id) + "_" + str(num) + ".png"), cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
                    object_name = bucket.blob("CroppedImages/" + filename)
                    object_name.upload_from_filename(os.path.join(tempDir, str(id) + "_" + str(num) + ".png"), content_type='image/png')
                    url = object_name.public_url
                    insert_rows.append((str(id) + "_" + str(num), url, date, None))
        
        query = bigquery_client.insert_rows(table, insert_rows)

### Pipeline

In [7]:
@pipeline(name="detection_pipeline", pipeline_root=PIPELINE_ROOT + "detection_pipeline")
def detection_pipeline():
    testing_task = test_images(threshold=0.6)
    insertion_task = insert_table(verification_exists = testing_task.output)

### Compiling

In [8]:
compiler.Compiler().compile(
    pipeline_func=detection_pipeline, package_path="detection_pipeline.yaml" # type: ignore
)

In [9]:
job = aiplatform.PipelineJob(
    display_name="detection_pipeline",
    template_path="./detection_pipeline.yaml"
)

### Run

In [None]:
job.submit(
    service_account="detection@plenary-truck-411220.iam.gserviceaccount.com"
)

### Schedule

In [10]:
job_schedule = aiplatform.PipelineJobSchedule(
    pipeline_job=job,
    display_name="detection_pipeline"
)

In [None]:
job_schedule.create(
    cron="0 */6 * * *",
    max_concurrent_run_count=1,
)