# This notebook is to demonstrate how to load datasets into EVA

## Launch EVA DB
Run the command `python eva.py` in the server where you want to deploy EVA

In [1]:
import cv2
import json
import os
import sys

import nest_asyncio
import pandas as pd

# eva lib
sys.path.insert(0,'..')
from src.server.db_api import connect

## Establish connection with EVA

In [2]:
nest_asyncio.apply()
connection = connect(host = '0.0.0.0', port = 5432) # hostname, port of the server where EVADB is running
cursor = connection.cursor()

## Utility functions

In [3]:
def create_table(table_name):
    """
    Creates a new table with table_name
    TODO: Make this function more flexible to receive column name, type etc..
    
    Args:
        table_name (string) - name of table
        
    Returns:
        True/False depending on if table creation was successful or not.
    """
    
    # Make this query dynamic later
    create_table_query = f""" 

    CREATE TABLE IF NOT EXISTS BDD (
        id INTEGER UNIQUE,
        frame_id INTEGER,
        video_id INTEGER,
        frame_data NDARRAY UINT8(3, ANYDIM, ANYDIM),
        labels NDARRAY STR(ANYDIM),
        bboxes NDARRAY FLOAT32(ANYDIM, 4),
        object_ids NDARRAY FLOAT32(ANYDIM)
    );

    """
    cursor.execute(create_table_query)
    response = cursor.fetch_all()
    
    if response.status == '0':
        return True
    else:
        return False

In [4]:
def parse_video_info(info_path, total_frames):
    """
    Parses a given info json file. total_frames is required to know which entry in the json corresponds to which frame id of video
    
    Args:
        info_path (string) - path to the info file
        total_frames (int) - total frames of the current video
    """
    
    print(f"parsing: {info_path}")
    with open(info_path, 'rb') as json_file:
        info_json = json.load(json_file)
        
    # number of frames for which we have information
    num_frames_info = len(info_json)
    
    # we assume the entries in the frame info are equally spaced
    # eg: if there are 100 total frames and 10 entries in info_json, then sample_frequency = 10 or we have info for 
    # every 10 frames
    sample_frequency = total_frames / num_frames_info
        
        
    # attach all info to this object, where key is the frame id
    video_info = {}
    frame_indices = []

    for info in info_json:
        frame_index = int(info['frameIndex'])
        frame_indices.append(frame_index)
        frame_id = int(frame_index * sample_frequency)
        video_info[frame_id] = {}
        object_labels = []
        bounding_boxes = []
        object_ids = []

        for label in info['labels']:
            object_labels.append(label['category'])
            bounding_boxes.append([[label['box2d']['x1'], label['box2d']['y1']], [label['box2d']['x2'], label['box2d']['y2']]])
            object_ids.append(label['id'])

        video_info[frame_id]['labels'] = object_labels
        video_info[frame_id]['bboxes'] = bounding_boxes
        video_info[frame_id]['ids'] = object_ids
        
    return video_info

## Utility function to load a single frame along with info

In [5]:
def insert_frame(dataset_name, frame, info):
    """
    Takes the path to 1 video and its corresponding json file. 
    Iterates over each frame of the video and performs an INSERT operation on the table
    
    Args:
        dataset_name (string) - name of the dataset this video belongs to. There should be table existing with this name
        frame (ndarray) - frame to be inserted into the table
        info (json) - info for this frame
    """
    
    # TODO: Not able to get a working query. Experimenting with it in the last cell of this notebook
    pass

## Utility function to load a video along with its info

In [6]:
def load_video(dataset_name, video_path, info_path):
    """
    Takes the path to 1 video and its corresponding json file. 
    Iterates over each frame of the video and performs an INSERT operation on the table
    
    Args:
        dataset_name (string) - name of the dataset this video belongs to. There should be table existing with this name
        video_path (string) - path of the video to be loaded
        info_path (string) - path of the json file that contains info about the video
    """
    
    print(f"Loading video: {video_path}")
    
    # cap object
    video_cap = cv2.VideoCapture(video_path)

    # read video config
    width = int(video_cap.get(3))
    height = int(video_cap.get(4))
    fps = video_cap.get(5)
    total_frames = video_cap.get(7)
    
    # get video info in a parsed format
    video_info = parse_video_info(info_path, total_frames)

    # set output config
    color=(0,255,0)
    thickness=3

    # How frequent you want to load
    sampling_rate = 1 

    # keep track of frame index
    frame_id = 0

    # capture frame by frame
    ret, frame = video_cap.read()
    
    while ret:

        # read frame by frame
        ret, frame = video_cap.read()
        if ret == True:
            
            # then we have info for this frame
            if frame_id in video_info:
                for bbox, label in zip(video_info[frame_id]['bboxes'], video_info[frame_id]['labels']):

                    x1, y1 = bbox[0]
                    x2, y2 = bbox[1]
                    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                    frame=cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness) # object bbox
                    cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, thickness-1) # object label
                    
                # TODO: call insert_frame() with the corresponding frame info
                #print(f"insert_frame(dataset_name(string), frame (ndarray), info(json) video_info[frame_id])")
            else:
                pass
                # TODO call insert_frame() with the empty video info
                #print(f"insert_frame(dataset_name(string), frame (ndarray), info(json) EMPTY)")
                
            # Display the resulting frame
            cv2.imshow('Frame', frame)

            # Press Q on keyboard to  exit
            if cv2.waitKey(25) & 0xFF == ord('q'):
                break

            frame_id += 1

        # Break the loop
        else: 
            break

    video_cap.release()
    cv2.destroyAllWindows()  
    
    # return last frame (remove this)
    return frame

In [7]:
def load_dataset(dataset_name):
    """
    A folder named dataset_name is expected to be inside datasets. This folder should contain 2 other folders named info and videos
    
    Args:
        dataset_name (string) - name of the dataset
        
    Returns:
        True if all videos have been loaded succesfully
        False if there was any error
    """
    
    root = "../"

    # dataset_name must be your folder name
    dataset_path = os.path.join(root, 'datasets', dataset_name)
    
    print(f"Loading {dataset_name} from the path {dataset_path}")
    
    if create_table(dataset_name):
        print(f"Table created successfully for {dataset_name}")
    else:
        return False

    # videos contains the raw videos
    videos_path = os.path.join(dataset_path, 'videos')

    # info contains a json file corresponding to each video
    info_path = os.path.join(dataset_path, 'info')

    # Load the paths for all videos and info files
    video_files = [os.path.join(videos_path, f) for f in sorted(os.listdir(videos_path))]
    info_files = [os.path.join(info_path, f) for f in sorted(os.listdir(info_path))]
    
    # check that each video under videos has a corresponding json file
    for video_file in video_files:
        video_name = video_file.split('/')[-1].split('.')[-2]
        expected_info_file = os.path.join(dataset_path, 'info', video_name + '.json')
        if expected_info_file not in info_files:
            print(f"Each video under videos should have a corresponding info file under info.")
            return False
        
    dataset_len = len(video_files)
    
    for video_index in range(dataset_len):
        video_path = video_files[video_index]
        info_path = info_files[video_index]
        
        # load this video
        load_video(dataset_name, video_path, info_path)
        
    return True

## Load a dataset

In [8]:
dataset_name = "bdd_test"

load_dataset(dataset_name)

Loading bdd_test from the path ../datasets/bdd_test
Table created successfully for bdd_test
Loading video: ../datasets/bdd_test/videos/00a04f65-8c891f94.mp4
parsing: ../datasets/bdd_test/info/00a04f65-8c891f94.json
Loading video: ../datasets/bdd_test/videos/00a04f65-af2ab984.mp4
parsing: ../datasets/bdd_test/info/00a04f65-af2ab984.json
Loading video: ../datasets/bdd_test/videos/00a0f008-3c67908e.mp4
parsing: ../datasets/bdd_test/info/00a0f008-3c67908e.json
Loading video: ../datasets/bdd_test/videos/00a0f008-a315437f.mp4
parsing: ../datasets/bdd_test/info/00a0f008-a315437f.json
Loading video: ../datasets/bdd_test/videos/00a2e3ca-5c856cde.mp4
parsing: ../datasets/bdd_test/info/00a2e3ca-5c856cde.json
Loading video: ../datasets/bdd_test/videos/00a2e3ca-62992459.mp4
parsing: ../datasets/bdd_test/info/00a2e3ca-62992459.json
Loading video: ../datasets/bdd_test/videos/00a2f5b6-d4217a96.mp4
parsing: ../datasets/bdd_test/info/00a2f5b6-d4217a96.json
Loading video: ../datasets/bdd_test/videos/00a3

True

## Experimenting with frame inserts

In [9]:
# read a sample frame
video_path = '../datasets/bdd_test/videos/00a04f65-8c891f94.mp4'
video_cap = cv2.VideoCapture(video_path)
ret, frame = video_cap.read()

frame_id_ = 1
video_id_ = 1
frame_data_ = frame
labels_ = ['car', 'car']
bboxes_ = [[[1, 2], [3, 4]], [[1, 2], [3, 4]]]
object_ids_ = [0, 0]

insert_frame_query = f"INSERT INTO BDD (frame_id, video_id, frame_data, labels, bboxes, object_ids) VALUES ({frame_id_}, {video_id_}, {frame_data_}, {labels_}, {bboxes_}, {object_ids_});"

print(f"insert frame query: {insert_frame_query}")

insert frame query: INSERT INTO BDD (frame_id, video_id, frame_data, labels, bboxes, object_ids) VALUES (1, 1, [[[33 39 43]
  [32 38 42]
  [29 35 39]
  ...
  [24 21 28]
  [23 20 27]
  [29 26 33]]

 [[39 45 49]
  [38 44 48]
  [34 40 44]
  ...
  [23 20 27]
  [24 21 28]
  [31 28 35]]

 [[40 46 50]
  [38 44 48]
  [34 40 44]
  ...
  [22 19 26]
  [26 23 30]
  [37 34 41]]

 ...

 [[10 11  6]
  [10 11  6]
  [10 11  6]
  ...
  [12 12 12]
  [12 12 12]
  [16 16 16]]

 [[14 11  5]
  [14 11  5]
  [14 11  5]
  ...
  [13 13 13]
  [10 10 10]
  [ 9  9  9]]

 [[20 17 11]
  [20 17 11]
  [19 16 10]
  ...
  [16 16 16]
  [ 9  9  9]
  [ 5  5  5]]], ['car', 'car'], [[[1, 2], [3, 4]], [[1, 2], [3, 4]]], [0, 0]);


In [10]:
cursor.execute(insert_frame_query)
response = cursor.fetch_all()
print(response)

Response Object:
@status: -1
@batch: Batch Object:
@dataframe:                      error
0  list index out of range
@batch_size: 1
@identifier_column: id
@metrics: None
