In [1]:
import os
import io
import datetime
import requests
import pandas as pd
from io import BytesIO
from PIL import Image, ImageDraw
from urllib.parse import urlparse
import glob, os, sys, time, uuid
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt
from pathlib import Path
from dotenv import load_dotenv

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient
from video_indexer import VideoIndexer
from azure.cognitiveservices.vision.face import FaceClient
from azure.cognitiveservices.vision.face.models import TrainingStatusType
from msrest.authentication import CognitiveServicesCredentials
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateBatch, ImageFileCreateEntry, Region
from msrest.authentication import ApiKeyCredentials
from azure.core.exceptions import ResourceNotFoundError
from azure.ai.formrecognizer import FormTrainingClient
load_dotenv()

True

In [2]:
CONFIG = {
    'SUBSCRIPTION_KEY': os.getenv("SUBSCRIPTION_KEY"),
    'LOCATION': 'trial',
    'ACCOUNT_ID': os.getenv("ACCOUNT_ID"), 

    'FACIAL_RECOGNITION_ENDPOINT': os.getenv('AZURE_FACIAL_RECOGNIZER_ENDPOINT'), 
    'FACIAL_RECOGNITION_KEY': os.getenv('AZURE_FACIAL_RECOGNIZER_KEY'), 

    'FORM_RECOGNITION_ENDPOINT': os.getenv('AZURE_FORM_RECOGNIZER_ENDPOINT'), 
    'FORM_RECOGNITION_KEY': os.getenv('AZURE_FORM_RECOGNIZER_KEY'),

    'OBJECT_DETECTION_TRAINING_ENDPOINT' : os.getenv('OBJECT_DETECTION_TRAINING_ENDPOINT'), 
    'OBJECT_DETECTION_TRAINING_KEY' : os.getenv('OBJECT_DETECTION_TRAINING_KEY'),
    'OBJECT_DETECTION_TRAINING_RESOURCE_ID' : os.getenv('OBJECT_DETECTION_TRAINING_RESOURCE_ID'),

    'OBJECT_DETECTION_PREDICTION_ENDPOINT' : os.getenv('OBJECT_DETECTION_PREDICTION_ENDPOINT'),
    'OBJECT_DETECTION_PREDICTION_KEY' : os.getenv('OBJECT_DETECTION_PREDICTION_KEY'),
    'OBJECT_DETECTION_PREDICTION_RESOURCE_ID' : os.getenv('OBJECT_DETECTION_PREDICTION_RESOURCE_ID')
}

In [3]:
form_recognizer_client = FormRecognizerClient(endpoint=CONFIG['FORM_RECOGNITION_ENDPOINT'], credential=AzureKeyCredential(CONFIG['FORM_RECOGNITION_KEY']))
form_training_client = FormTrainingClient(endpoint=CONFIG['FORM_RECOGNITION_ENDPOINT'], credential=AzureKeyCredential(CONFIG['FORM_RECOGNITION_KEY']))
face_client = FaceClient(CONFIG['FACIAL_RECOGNITION_ENDPOINT'], CognitiveServicesCredentials(CONFIG['FACIAL_RECOGNITION_ENDPOINT']))
training_credentials = ApiKeyCredentials(in_headers={"Training-key": CONFIG['OBJECT_DETECTION_TRAINING_KEY']})
trainer = CustomVisionTrainingClient(CONFIG['OBJECT_DETECTION_TRAINING_ENDPOINT'], training_credentials)

prediction_credentials = ApiKeyCredentials(in_headers={"Prediction-key": CONFIG['OBJECT_DETECTION_PREDICTION_KEY']})
predictor = CustomVisionPredictionClient(CONFIG['OBJECT_DETECTION_PREDICTION_ENDPOINT'], prediction_credentials)

In [4]:
video_analysis = VideoIndexer(
    vi_location=CONFIG['LOCATION'],
    vi_account_id=CONFIG['ACCOUNT_ID'], 
    vi_subscription_key=CONFIG['SUBSCRIPTION_KEY']
)

Multiple Digital ID Text Extraction

In [31]:
list_of_id_results = []
test_images = [file for file in glob.glob("./data/digital_id_template/Test-Images/ca-dl-*.png")]
for image_path in test_images:
        with open(image_path, "rb") as test_data:
                results = form_recognizer_client.begin_recognize_identity_documents(test_data, content_type="image/png")
        list_of_id_results.append(results.result())

In [32]:
list_of_ids = []
for i in list_of_id_results:
    dict_results = {}
    for key, value in (i[0].fields).items():
        dict_results[key] = value.value
    list_of_ids.append(dict_results)

In [33]:
list_of_ids[0] # First Example

{'Address': '28 Tesla Avenue, San Mateo, CA, 94403',
 'CountryRegion': 'USA',
 'DateOfBirth': datetime.date(1995, 9, 8),
 'DateOfExpiration': datetime.date(2026, 1, 1),
 'DocumentNumber': 'D4204209',
 'FirstName': 'Daniel',
 'LastName': 'Da Cruz',
 'Region': 'California',
 'Sex': 'M'}

Custom Text Extraction Model - Boarding Passes

In [34]:
training_images_url = "https://udacitystorageaccount111.blob.core.windows.net/custom-form?sp=racwdl&st=2022-11-07T02:19:28Z&se=2022-11-14T11:19:28Z&spr=https&sv=2021-06-08&sr=c&sig=WAIHYrZhno1sSfIWH2kFY2G35nxwQHDXGHqIHPmKj8g%3D"
training_process = form_training_client.begin_training(training_images_url, use_training_labels=True)
custom_model = training_process.result()

In [41]:
custom_model_info = form_training_client.get_custom_model(model_id=custom_model.model_id)
print("Model ID: {}".format(custom_model_info.model_id))
print("Status: {}".format(custom_model_info.status))
print("Training started on: {}".format(custom_model_info.training_started_on))
print("Training completed on: {}".format(custom_model_info.training_completed_on))

Model ID: 14967f9f-d0c3-4fce-b267-565242c59494
Status: ready
Training started on: 2022-11-08 14:31:23+00:00
Training completed on: 2022-11-08 14:31:41+00:00


In [42]:
list_of_boarding_pass_results = []
boarding_pass_results = []

test_images = [file for file in glob.glob("./data/boarding_pass_template/Test-Images/*.pdf")]
for image_path in test_images:
        with open(image_path, "rb") as test_data:
                results = form_recognizer_client.begin_recognize_custom_forms(model_id=custom_model_info.model_id, form = test_data, content_type='application/pdf')
        list_of_boarding_pass_results.append(results.result())

for i in list_of_boarding_pass_results:
    dict_results = {}
    for key, value in (i[0].fields).items():
        dict_results[key] = value.value
    boarding_pass_results.append(dict_results)

In [43]:
boarding_pass_results[4]

{'Seat': '24B',
 'Departure Destination': 'New York',
 'Carrier': 'ZA',
 'Arrival Destination': 'San Francisco',
 'Class': 'D',
 'Airline': 'UDACITY AIRLINES',
 'Boarding Gate': 'I2',
 'Boarding Time': None,
 'Flight Number': '820',
 'Ticket Number': 'ETK-737268572620C',
 'Baggage': 'YES',
 'Passenger Name': 'Noah Taleb',
 'Date': 'November 15, 2022'}

In [44]:
boarding_pass_results[0]['image_url'] = "https://udacitystorageaccount111.blob.core.windows.net/digital-id/ca-dl-daniel-da-cruz.png?sp=r&st=2022-11-08T12:22:34Z&se=2022-11-15T20:22:34Z&spr=https&sv=2021-06-08&sr=b&sig=VJwPE5wDWXI6gU3WEMcdFwKSjrVTfa%2FsTl3L3jZnL5c%3D"
boarding_pass_results[1]['image_url'] = "https://udacitystorageaccount111.blob.core.windows.net/digital-id/ca-dl-helena-da-cruz.png?sp=r&st=2022-11-08T12:23:27Z&se=2022-11-15T20:23:27Z&spr=https&sv=2021-06-08&sr=b&sig=BlN0Le%2BcLXKxolJ6fj2DH3sIhNKi10d23DtOxlVqox0%3D"
boarding_pass_results[2]['image_url'] = "https://udacitystorageaccount111.blob.core.windows.net/digital-id/ca-dl-john-doe.png?sp=r&st=2022-11-08T12:24:46Z&se=2022-11-15T20:24:46Z&spr=https&sv=2021-06-08&sr=b&sig=%2FIgg7RM3H2DBKjsbsf3S1t%2BX0iA8pwxKOOlbKuXFEDM%3D"
boarding_pass_results[3]['image_url'] = "https://udacitystorageaccount111.blob.core.windows.net/digital-id/ca-dl-mark-musk.png?sp=r&st=2022-11-08T12:25:36Z&se=2022-11-15T20:25:36Z&spr=https&sv=2021-06-08&sr=b&sig=%2F81fcgnXTo77uoiXPQSUEDlpZ7ZN8rlKPScFPliPbbg%3D"
boarding_pass_results[4]['image_url'] = "https://udacitystorageaccount111.blob.core.windows.net/digital-id/ca-dl-noah-taleb.png?sp=r&st=2022-11-08T12:26:04Z&se=2022-11-15T20:26:04Z&spr=https&sv=2021-06-08&sr=b&sig=yhBO8WyBRxwWI62nvfz%2FOM1eP34fGtWEPEW%2FbZ5eyoE%3D"

Upload Video Samples

In [45]:
for i in boarding_pass_results:
    print("-".join(i['Passenger Name'].lower().split()))

daniel-da-cruz
helena-da-cruz
john-doe
mark-musk
noah-taleb


In [46]:
# Upload to Video Analzyer from local disk
uploaded_video_ids = []
for i in boarding_pass_results:
   uploaded_video_id = video_analysis.upload_to_video_indexer(
      input_filename='data\digital-video-sample\{}.mp4'.format("-".join(i['Passenger Name'].lower().split())),
      video_name='{}-boarding-pass'.format("-".join(i['Passenger Name'].lower().split())),  # unique identifier for video in Video Indexer platform
      video_language='English'
   )
   uploaded_video_ids.append(uploaded_video_id)

Uploading video to video indexer...
Uploading video to video indexer...
Uploading video to video indexer...
Uploading video to video indexer...
Uploading video to video indexer...


In [47]:
time.sleep(300)
video_infos = []
for i in uploaded_video_ids:
    info = video_analysis.get_video_info(i, video_language='English')
    video_infos.append(info)

Getting video info for: e7cde667fe
Getting video info for: be90548748
Getting video info for: 8c926fee41
Getting video info for: b402067a2a
Getting video info for: d6e3005610


In [61]:
final_thumbnails = []
for i in range(len(video_infos)):
    images = []
    img_raw = []
    img_strs = []
    thumbnails = []
    for each_thumb in video_infos[i]['videos'][0]['insights']['faces'][0]['thumbnails']:
        if 'fileName' in each_thumb and 'id' in each_thumb:
            file_name = each_thumb['fileName']
            thumb_id = each_thumb['id']
            img_code = video_analysis.get_thumbnail_from_video_indexer(uploaded_video_ids[i],  thumb_id)
            img_strs.append(img_code)
            img_stream = io.BytesIO(img_code)
            img_raw.append(img_stream)
            img = Image.open(img_stream)
            images.append(img)
            thumbnails.append(thumb_id)

    thumbnail_directory = "./data/ai-generated-thumbnails/"
    name = video_infos[i]['name']
    j = 1
    for img in images:
        img.save(thumbnail_directory + "{}".format(name) + '/human-face' + str(j) + '.jpg')
        j +=1
    final_thumbnails.append(thumb_id)
    print("Thumbnails saved to {}".format(thumbnail_directory))

Getting thumbnail from video: e7cde667fe, thumbnail: f49ce56f-eed5-4ccd-bd5a-1a1748001748
Getting thumbnail from video: e7cde667fe, thumbnail: 95b6b681-8482-445d-8b28-495af4dbb1f0
Getting thumbnail from video: e7cde667fe, thumbnail: f5a4a298-9f68-4765-83a3-dc206fe5d4c4
Getting thumbnail from video: e7cde667fe, thumbnail: 700b537b-0bb9-45bf-8be6-7381a46f1856
Getting thumbnail from video: e7cde667fe, thumbnail: 35bfb3ba-d447-4c25-bfae-a7f6ce93828f
Getting thumbnail from video: e7cde667fe, thumbnail: ce7bdd27-5c29-4f21-b45f-65d45250957b
Getting thumbnail from video: e7cde667fe, thumbnail: b40043ed-58ee-4592-8e7b-22f528eee668
Getting thumbnail from video: e7cde667fe, thumbnail: 3a523b55-a94e-4f58-99c3-7bcf00a51f7d
Thumbnails saved to ./data/ai-generated-thumbnails/
Getting thumbnail from video: be90548748, thumbnail: 14ec2d30-bf76-4581-b409-209e69227770
Getting thumbnail from video: be90548748, thumbnail: 9e42a25c-2167-4ca0-929b-4ec4953a5725
Getting thumbnail from video: be90548748, thumbn

In [None]:
img_code = video_analysis.get_thumbnail_from_video_indexer(uploaded_video_id,  thumbnails[0])