In [None]:
# hw_location: where you want to run inference
#     "@cloud" to use DeGirum cloud
#     "@local" to run on local machine
#     IP address for AI server inference
# model_zoo_url: url/path for model zoo
#     cloud_zoo_url: valid for @cloud, @local, and ai server inference options
#     '': ai server serving models from local folder
#     path to json file: single model zoo in case of @local inference
# model_name: name of the model for running AI inference
# image_source: image source for inference
#     path to image file
#     URL of image
#     PIL image object
#     numpy array

hw_location = "@local"
model_zoo_url = "https://cs.degirum.com/degirum/timm_gender_model_test"
encoder_model_name = "encoder_action_recognition--224x224_float_openvino_cpu_1"
decoder_model_name = "decoder_action_recognition--224x224_float_openvino_cpu_1"


In [None]:
import degirum as dg
import degirum_tools

action_rec_zoo = dg.connect(hw_location, model_zoo_url, degirum_tools.get_token())

In [None]:
from preprocessor_action_rec_encoder import ActionRecEncoderPreprocessor
from postprocessor_action_rec_decoder import ActionRecDecoderPostprocessor
encoder_model = action_rec_zoo.load_model(encoder_model_name)
decoder_model = action_rec_zoo.load_model(decoder_model_name, custom_postprocessor = ActionRecDecoderPostprocessor)

In [None]:
dec_w, dec_h = decoder_model.model_info.InputW[0],decoder_model.model_info.InputC[0]

In [None]:
video_source = "https://archive.org/serve/ISSVideoResourceLifeOnStation720p/ISS%20Video%20Resource_LifeOnStation_720p.mp4"  

In [None]:
import numpy as np
import cv2
def image_overlay(frame, result_de):
    for idx, item in enumerate(result_de):
        label_text = item['label']
        text_position = (10, 30 + idx * 30)  # Adjust vertical position for each label
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.8
        font_color = (255, 255, 255)  # White color in BGR
        thickness = 2
        cv2.putText(frame, label_text, text_position, font, font_scale, font_color, thickness)


In [None]:
video_results_path = "action_rec_output.mp4"
num_frames = 100  # Maximum number of frames to read from video, set to 0 for all frames.
sample_duration = 16
skip_first_frames = 600
size = encoder_model.model_info.InputC[0]
with degirum_tools.open_video_stream(video_source) as video_stream:
    w, h, fps = degirum_tools.get_video_stream_properties(video_stream)
    fps = 30
    if num_frames == 0:
        total_frames = video_stream.get(cv2.CAP_PROP_FRAME_COUNT)
    else:
        total_frames = num_frames
    encoder_output = []
    counter = 0
    frames = []
    with degirum_tools.open_video_writer(video_results_path, w, h, fps) as writer:
        progress = degirum_tools.Progress(total_frames)
        
        for i, frame in enumerate(degirum_tools.video_source(video_stream)):
            if i < skip_first_frames:
                continue
            if i == skip_first_frames+total_frames:
                break
            counter = counter + 1
            
            scale = 1280 / max(frame.shape)
            # Adaptative resize for visualization.
            if scale < 1:
                frame = cv2.resize(frame, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)

            if counter % 2 == 0:
                preprocessed = ActionRecEncoderPreprocessor(size).preprocess_frame_for_encoder(frame)
                encoder_output.append(encoder_model(preprocessed).results[0]["data"])

                if len(encoder_output) == sample_duration:
                    decoder_input = np.concatenate(encoder_output, axis=0)
                    # Organize input shape vector to the Decoder (shape: [1x16x512]]
                    decoder_input = decoder_input.transpose((2, 3, 0, 1))
                    decoder_input = decoder_input.reshape((1, dec_w, dec_h))
                    decoder_input = decoder_input.astype(np.float32)
                    result_de = decoder_model(decoder_input) 
                    for frame in frames:
                        image_overlay(frame, result_de.results)
                        writer.write(frame)
                    encoder_output = []
                    frames = []
            frames.append(frame)                   
            progress.step()
