# Import Libraries

In [1]:
%%capture
!pip install --upgrade fastapi ffmpeg uvicorn python-multipart tensorflow-gpu scikit-image imutils wandb tensorflow_hub Pillow

from fastapi import FastAPI, File, UploadFile
from skimage.metrics import structural_similarity as compare_ssim

import tensorflow_hub as hub
import numpy as np
import tensorflow as tf
from PIL import Image
import label_map_util
import ffmpeg, shutil

from google.cloud import storage
import nest_asyncio, uvicorn, os, pathlib

import cv2, wandb

# Set Up Google Cloud Parameters


## Set Up Google Cloud Project and Model Location

In [2]:
project = 'mlops-content1' # Cloud Project Name
location = 'james-mlops-capstone' # Model Storage Bucket

## Create Storage Bucket

In [3]:
storage_client = storage.Client.from_service_account_json('gcp-key.json')

bucket = storage_client.bucket(location)

## Double Check Cloud Bucket (Development Code Only)

In [4]:
%%capture
blobs = storage_client.list_blobs(location)
for blob in blobs:
    print(blob.name)

# WandB Functions

In [5]:
def init_wandb(project_name):
   global wandb_project
   wandb_project = str(project_name)
   wandb.init(project=wandb_project, sync_tensorboard=True)
   return True

# Set File Location

In [6]:
def set_folder_location(in_location):
    global location 
    location = str(in_location)
    global bucket
    bucket = storage_client.bucket(location)
    return True

# Split Video to Frames and Upload

## Download Video to Local Instance

In [7]:
def download_video(video_name):
    print("Downloading: " + str(video_name))
    blob = bucket.blob(video_name)
    blob.download_to_filename(video_name)

## Break down video to frames

In [8]:
def split_video_frames(video_name):
    print("Splitting: " + str(video_name))
    folder = video_name[:-4]
    try:
        shutil.rmtree(str(folder))
    except:
        pass
    os.mkdir(str(folder))
    
    video_capture = cv2.VideoCapture(str(video_name))
    saved_frame_name = 1

    while True:
        print("Frame: " + format(saved_frame_name, '05d'), end="\r")
        success, frame = video_capture.read()

        if success:
            cv2.imwrite(f"{str(folder)}/frame{format(saved_frame_name, '05d')}.png", frame)
            saved_frame_name += 1
        else:
            break
    print("Done                       ")

## Upload Video Frames

In [9]:
def upload_frames_from_folder(folder_name):
    files=sorted(os.listdir(str(folder_name)))
    #files=files[1:]
    
    print("Uploading Frames")
    for i in range(len(files)):
        print(files[i] + "             ", end="\r")
        blob = bucket.blob(folder_name + "/" + files[i])
        blob.upload_from_filename(folder_name + "/" + files[i])
        
    print("Done Uploading               ", end="\r")

## SSIM Compare Video Frames for Novel Frames

## Remove Blurry Images from Set

### Calculate Blurriness using Laplacian

In [10]:
def variance_of_laplacian(image):
    # compute the Laplacian of the image and then return the focus
    # measure, which is simply the variance of the Laplacian
    return cv2.Laplacian(image, cv2.CV_64F).var()

### Remove Blurry Images

In [11]:
def remove_blurry_images(folder_name):
    files=sorted(os.listdir(str(folder_name)))
    files=files[1:]
    
    blurriness = np.zeros(len(files))
    
    print("Calculating Average Blurriness")
    for i in range(len(files)):
        print(files[i] + "             ", end="\r")
        img=cv2.imread(folder_name+'/'+files[i])
        img_gray=cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        blurriness[i]=variance_of_laplacian(img_gray)
        wandb.log({'Individual Laplacian': blurriness[i]})
    
    median_blur = np.median(blurriness)
    wandb.log({'Batch Median Laplacian': median_blur})
    print("Median Blur (Laplacian Variance): " + str(median_blur))
    blur_cutoff = median_blur*1.05 #+ ((1-average_blur)*0.1)
    print("Blur Cutoff (Laplacian Variance): " + str(blur_cutoff))
    
    print("Removing Noisy Images")
    
    count = 0
    for i in range(len(files)):
        if blurriness[i] > blur_cutoff:
            #print("Deleting " + files[i] + " - Laplacian Noisiness: " + str(blurriness[i]))
            os.remove(folder_name+'/'+files[i])
            count += 1
    blur_ratio = count/len(files)
    wandb.log({'Noisy Frame Ratio': blur_ratio})
    print("Done Checking Frames                  ")

# Deduplicate Similar Frames

## Calculate Similarity Between Images

In [12]:
def compare_images(image1, image2):
    image_gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
    image_gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
    diff, _ = compare_ssim(image_gray1, image_gray2, full=True)
    return diff

## Remove Duplicates

In [13]:
def remove_duplicates(folder_name):
    files=sorted(os.listdir(str(folder_name)))
    files=files[1:]
    print("Removing Duplicate and Highly Similar Frames\nCalculating Frame Similarities")
    
    diff = np.zeros(len(files)-1)    
    
    for i in range(len(files)-1):
        image1 = cv2.imread(folder_name+'/'+files[i])
        image2 = cv2.imread(folder_name+'/'+files[i+1])
        diff[i] = compare_images(image1, image2)
        wandb.log({'Individual Frame Similarities': diff[i]})
        print(str(diff[i]), end="\r")
    
    median_diff = np.median(diff)
    wandb.log({'Batch Median Frame Similarity': median_diff})
    
    diff_cutoff = median_diff*1.05
    
    if diff_cutoff < 0.95:
        diff_cutoff = 0.95
        
    print("Similarity Cutoff (OpenCV Compare Images): " + str(diff_cutoff))
    print("Removing Duplicate Images")
    
    count = 0
    for i in range(len(diff)):
        if diff[i] > 0.99:
            #print("Deleting " + files[i] + " - Similarity: " + str(diff[i]), end="\r")
            os.remove(folder_name+'/'+files[i])
            wandb.log({'Duplicates Similarity': diff})
            count += 1
        
    duplicate_ratio = count/len(files)
    wandb.log({'Batch Duplicate Remove Ratio': duplicate_ratio})
    print("Done Checking Frames, " + str(count) + " frames removed.")

# Get Insight into the Dataset using Faster RCNN Resnet50 COC0 2018/01/28

In [14]:
model_url = 'http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz'
base_url = os.path.dirname(model_url)+"/"
model_file = os.path.basename(model_url)
model_name = os.path.splitext(os.path.splitext(model_file)[0])[0]
model_dir = tf.keras.utils.get_file(fname=model_name, origin=base_url + model_file, untar=True)
model_dir = pathlib.Path(model_dir)/"saved_model"
model = tf.saved_model.load(str(model_dir))
model = model.signatures['serving_default']

2021-09-17 17:11:58.636836: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-17 17:11:58.645020: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-17 17:11:58.645874: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-17 17:11:58.647080: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [15]:
CLASS_LABELS = ["", "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

In [16]:
def detect_objects(image):
    img = Image.open(image)
    input_tensor = tf.convert_to_tensor(img)
    input_tensor = input_tensor[tf.newaxis,...]
    output_dict = model(input_tensor)
    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {key:value[0, :num_detections].numpy() for key,value in output_dict.items()}
    classes = output_dict['detection_classes'].astype(np.int64)
    class_names = [None] * len(classes)
    for i in range(len(classes)):
        class_names[i]=CLASS_LABELS[classes[i]]
    wandb.log({'Detections per Image': num_detections})
    return class_names

In [17]:
def detect_file(folder_name):
    for file in sorted(os.listdir(folder_name)):
        total_location = folder_name+"/"+file
        classes = detect_objects(total_location)
        with open(total_location+".txt", "w") as text_file:
            text_file.write(str(classes))

# Full Video Analysis and Upload

In [18]:
def clean_video(video_name):
    video_name = str(video_name)
    folder_name = str(video_name)[:-4]
    download_video(video_name)
    split_video_frames(video_name)
    remove_blurry_images(folder_name)
    remove_duplicates(folder_name)
    detect_file(folder_name)
    upload_frames_from_folder(folder_name)
    shutil.rmtree(folder_name)
    os.remove(video_name)

# Analyze Entire Bucket

In [19]:
def clean_entire_bucket():
    blobs = storage_client.list_blobs(location)
    for blob in blobs:
        clean_video(blob.name)

# FastAPI Deployment

In [20]:
app = FastAPI()

@app.on_event("startup")
def start_wandb():
    init_wandb(location)
    return {'message': ('Weights and Balances Started as project: ' + wandb_project)}

@app.get('/')
def index():
    return {'message': 'This is the homepage of the model, add \'/docs\' to the end of the URL to access FastAPI to make predictions with the model'}

@app.get('/set_gcp_location')
def set_gcp_location(string_input):
    set_folder_location(str(string_input))
    return {'message': ('GCP Location Set to: ' + location)}

@app.get('/clean_single_video')
async def single_clean(string_input):
    clean_video(str(string_input))
    return {'message': ('Video: ' + str(string_input) + ' cleaned and uploaded to gs://' + location + "/" + str(string_input))}

@app.get('/clean_bucket')
async def full_clean():
    clean_entire_bucket()
    return {'message': ('Bucket: ' + location + ' cleaned and uploaded to gs://' + location)}

# Main Function

In [None]:
nest_asyncio.apply()
wandb.login(relogin=True)
uvicorn.run(app, host='0.0.0.0', port=8000)

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


[34m[1mwandb[0m: Paste an API key from your profile and hit enter:  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/jupyter/.netrc
INFO:     Started server process [29949]
INFO:     Waiting for application startup.
[34m[1mwandb[0m: Currently logged in as: [33mjamesysato[0m (use `wandb login --relogin` to force relogin)


INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
