In [None]:
import os, shutil, re
from moviepy.video.io.VideoFileClip import VideoFileClip

In [None]:
os.getcwd()

In [None]:
path = '.'

In [None]:
jpegs_src = []

for dirpath, dirnames, filenames in os.walk(os.path.join(path, 'metadata')):
    # print(dirpath, dirnames, filenames)
    for filename in filenames:
        if filename.endswith('.jpg'):
            jpegs_src.append(os.path.join(dirpath, filename))

In [None]:
jpegs_dst = [str(src).replace('metadata', 'frames') for src in jpegs_src]

In [None]:
for src, dst in zip(jpegs_src, jpegs_dst):
    print(src, '-->', dst)
    # os.makedirs('/'.join(dst.split('/')[:-1]), exist_ok=True)
    # shutil.move(src, dst)

In [None]:
video_path = './video/video'
audio_path = './audio'

In [None]:
def extract_audio(video_path, output_path):
    # Extract audio and write to file
    # fps=16000 sets the sample rate
    # nbytes=2 and codec='pcm_s16le' ensures 16-bit WAV format
    # ffmpeg_params=["-ac", "1"] forces the output to mono
    with VideoFileClip(video_path) as video:
        video.audio.write_audiofile(
            output_path,
            fps=16000,
            nbytes=2,
            codec='pcm_s16le',
            ffmpeg_params=["-ac", "1"]
        )

In [None]:
os.makedirs(audio_path, exist_ok=True)
for filename in os.listdir(video_path):
    # print(os.path.join(video_path, filename), os.path.join(audio_path, filename.replace('.mp4', '.wav')))
    extract_audio(os.path.join(video_path, filename), os.path.join(audio_path, filename.replace('.mp4', '.wav')))

In [None]:
samples = []
for dirname in os.listdir('metadata'):
    if not os.path.isdir(os.path.join('metadata', dirname)):
        continue
    for filename in os.listdir(os.path.join('metadata', dirname)):
        if filename.endswith('.json'):
            samples.append(os.path.join(dirname, filename.split('.json')[0]))

In [None]:
metadata_files = samples

In [None]:
audio_files = set([file.split('/')[0] for file in metadata_files if file.split('/')[0] + '.wav' in os.listdir('audio')])

In [None]:
len(audio_files)

In [None]:
image_files = set([file for file in metadata_files if file + '.jpg' in os.listdir('frames')])

In [None]:
len(image_files)

In [None]:
import json

In [None]:
for file in metadata_files:
    json_obj = json.load(open(os.path.join('metadata', file + '.json')))

In [None]:
json_obj

In [None]:
ground_truth = {
    file: json.load(open(os.path.join('metadata', file + '.json')))
    for file in metadata_files
}

In [None]:
ground_truth = {
    file: json.load(open(os.path.join('metadata', file + '.json')))['annotations']
    for file in metadata_files
}

In [None]:
ground_truth['l_7B1wnTNtA_00020/00254']

In [None]:
for key, val in ground_truth.items():
    if len(val) > 1:
        print(key)

In [None]:
samples

In [None]:
import numpy as np
import cv2
import torch
import torch.nn.functional as F

In [None]:
ground_truths = {}
for file in metadata_files:
    gt = json.load(open(os.path.join('metadata', file + '.json')))
    ground_truths[file] = {k: gt[k] for k in ('original_width', 'original_height', 'annotations')}

In [None]:
for key, val in ground_truths.items():
    if len(val['annotations'])>1:
        print(key)

In [None]:
def convert_ann_to_mask(ann, height: int, width: int):
    mask = np.zeros((height, width), dtype=np.uint8)
    poly = ann["segmentation"]

    for p in poly:
        p = np.array(p).reshape(-1, 2).astype(int)
        cv2.fillPoly(mask, [p], 1)
    return mask

In [None]:
heatmap = torch.zeros((3, 244, 244))
gt = [ground_truths['GueOh9nkQ38_00131/00296'], ground_truths['XdlIbNrki5o_00136/00238'], ground_truths['l_7B1wnTNtA_00020/00254']]

In [None]:
target = torch.zeros_like(heatmap)
for b, g in enumerate(gt):
    mask = torch.zeros((g['original_height'], g['original_width']))
    for ann in g['annotations']:
        mask += torch.tensor(convert_ann_to_mask(ann, g['original_height'], g['original_width']))
    mask = F.interpolate(mask.unsqueeze(0).unsqueeze(0), size=heatmap[0].shape, mode='bilinear', align_corners=False).squeeze()
    target[b] = mask >= 1
target.to(heatmap.device)

In [None]:
cv2.imshow('Grayscale Image', target[2].numpy())
cv2.waitKey(0)
cv2.destroyAllWindows()