In [28]:
%cd /code/
import io
import openai
import os
import base64
from pathlib import Path

from decord import VideoReader
from PIL import Image
import numpy as np

from pydantic import BaseModel, Field

/code


In [29]:
class Base(BaseModel):
    anomaly_score: float = Field(..., title="Anomaly Score", description="Anomaly score of the input text")
    explanation: str = Field(..., title="Explanation", description="Explanation of the anomaly score")

In [30]:
def get_frames(
    p_video,
    duration_sec = 1,
    max_frames_num = 32,
):
    FPS = 30
    num_frames_segment = int(duration_sec * FPS)

    vr = VideoReader(str(p_video))
    total_frame_num = len(vr)
    num_segments = total_frame_num // num_frames_segment
    for segment_idx in range(num_segments):
        segment_start_idx = segment_idx * num_frames_segment
        segment_end_idx = segment_start_idx + num_frames_segment - 1
        uniform_sampled_frames = np.linspace(segment_start_idx, segment_end_idx, max_frames_num + 2, dtype=int)[1:-1]
        frame_idx = uniform_sampled_frames.tolist()
        frames = vr.get_batch(frame_idx).asnumpy()
        yield {
            'frames': frames,
            'segment_idx': segment_idx,
            'total_segments': num_segments,
            'segment_start_idx': segment_start_idx,
            'segment_end_idx': segment_end_idx,
        }


def frames_to_base64(frames):
    img_list = []
    for frame in frames:
        img = Image.fromarray(frame)
        img_byte_arr = io.BytesIO()
        img.save(img_byte_arr, format='PNG')
        img_byte_arr = img_byte_arr.getvalue()
        img_list.append(base64.b64encode(img_byte_arr).decode('utf-8'))
    return img_list


p_video = Path('data/UCF_Crimes/Videos/Fighting/Fighting003_x264.mp4')
frames = get_frames(p_video)
for frame in frames:
    img_list = frames_to_base64(frame['frames'])
    # print(img_list)
    print(frame['segment_idx'], frame['total_segments'], frame['segment_start_idx'], frame['segment_end_idx'])
    break

0 103 0 29


In [32]:

# client = openai.Client(api_key='EMPTY', base_url='http://10.90.21.21:50001/v1')
client = openai.Client(api_key=os.getenv("OPENAI_API_KEY"))
request = client.beta.chat.completions.parse(
    model='gpt-4o',
    messages=[
        {'role': 'system', 'content': 'You are a helpful assistant.'},
        {'role': 'user', 'content': [
            {
                'type': 'image_url',
                'image_url': {'url': f'data:image/png;base64,{img_list[0]}'}
            },
            {
                'type': 'text',
                'text': 'How anomalous is this video? Please rate from 0 to 1 with 0 being not anomalous and 1 being very anomalous and provide an explanation in a few sentences in provided json format.',
            },
        ]}
    ],
    seed=1234,
    response_format=Base,
)
response = request.choices[0].message.content
response

'{"anomaly_score":0.2,"explanation":"The video depicts a scene in a subway station with people behaving in a typical manner for such an environment. There is nothing clearly unusual, such as a lack of people, odd behavior, or strange objects in view that would increase the anomaly score."}'