In [None]:
import os
os.chdir("/root/dev/vcmr/experimental")
from yt_dlp import YoutubeDL
import subprocess
from PIL import Image
import torch
import clip
import numpy as np
import sys
from argparse import Namespace
import ray
import pandas as pd
import ray

HOME_DIR = "/root/dev/vcmr"

# Ray 초기화
ray.init(ignore_reinit_error=True)

df = pd.read_csv("../Spotify_Youtube.csv")

ray.init(ignore_reinit_error=True)

sys.path.append(f"{HOME_DIR}/lp-music-caps/lpmc/music_captioning")

ydl_opts = {
    'format': 'bestvideo[height<=360]+bestaudio/best[height<=360]/worst',
    'quiet': True,
    'noplaylist': True,
    'skip_download': True,
}

segments = [
    {
        'start': '00:00:30',
        'duration': '10',
        'segment_num': 1,
        'frame_time': '00:00:35',
    },
    {
        'start': '00:01:20',
        'duration': '10',
        'segment_num': 2,
        'frame_time': '00:01:25',
    },
    {
        'start': '00:02:10',
        'duration': '10',
        'segment_num': 3,
        'frame_time': '00:02:15',
    },
]


device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
model.eval()
print()


2024-12-05 14:25:25,222	INFO worker.py:1821 -- Started a local Ray instance.
2024-12-05 14:25:26,482	INFO worker.py:1654 -- Calling ray.init() again after it has already been called.


CLIP(
  (visual): VisionTransformer(
    (conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
    (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (transformer): Transformer(
      (resblocks): Sequential(
        (0): ResidualAttentionBlock(
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          )
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): Sequential(
            (c_fc): Linear(in_features=768, out_features=3072, bias=True)
            (gelu): QuickGELU()
            (c_proj): Linear(in_features=3072, out_features=768, bias=True)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        )
        (1): ResidualAttentionBlock(
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          

In [None]:
os.chdir("/root/dev/vcmr/experimental")



url = "https://www.youtube.com/watch?v=ZncbtRo7RXs"

@ray.remote(num_gpus=1)
def process_url(url, segments, model, preprocess, device):
    try:
        image_embeddings = []
        audio_embeddings = []
        video_id = url.split('=')[-1]

        print("fetch video info")
        with YoutubeDL(ydl_opts) as ydl:
            info_dict = ydl.extract_info(url, download=False)
            if 'requested_formats' in info_dict:
                video_url = info_dict['requested_formats'][0]['url']
                audio_url = info_dict['requested_formats'][1]['url']
            else:
                video_url = info_dict['url']
                audio_url = info_dict['url']

        print("download segments")
        tags = []
        for segment in segments:
            segment_num = segment['segment_num']
            tag = f"{video_id}_{segment_num}"
            tags.append(tag)
            audio_output = f"./{tag}.mp3"
            image_output = f"./{tag}.jpg"
            audio_cmd = [ 'ffmpeg', '-y', '-ss', segment['start'], '-t', segment['duration'], '-i',
                        audio_url, '-vn', '-acodec', 'libmp3lame', '-ar', '44100', '-ac', '2', audio_output]
            audio_result = subprocess.run(audio_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
            image_cmd = ['ffmpeg', '-y', '-ss', segment['frame_time'], '-i', video_url, '-frames:v', '1', image_output]
            image_result = subprocess.run(image_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

        print("processing images")
        for tag in tags:
            image = Image.open(f"./{tag}.jpg")
            image_input = preprocess(image).unsqueeze(0).to(device)
            image_features = model.encode_image(image_input)
            image_embeddings.append(image_features)

        # print("processing text")
        # for tag in tags:
        #     os.chdir("/root/dev/vcmr/lp-music-caps/lpmc/music_captioning")
        #     args.audio_path = f"/root/dev/vcmr/experimental/{tag}.mp3"
        #     inference = evaluate(captioner, args)
        #     os.chdir("/root/dev/vcmr/experimental")
        #     text = clip.tokenize(inference[0], truncate=True).to(device)
        #     with torch.no_grad():
        #         audio_features = model.encode_text(text).cpu().numpy()
        #     audio_embeddings.append(audio_features)

        return audio_embeddings
    except Exception as e:
        return None, None


In [None]:
def main(urls, max_concurrent_tasks=10):
    results = []
    futures = []

    for url in urls:
        future = process_url.remote(url, segments, model, preprocess, device)
        futures.append(future)

        if len(futures) >= max_concurrent_tasks:
            done, futures = ray.wait(futures, num_returns=1)
            for completed in done:
                result = ray.get(completed)
                if result:
                    results.append(result)

    for future in ray.get(futures):
        if future:
            results.append(future)

urls = list(df["Url_youtube"])[:1000]
main(urls)



[36m(process_url pid=3333258)[0m fetch video info
[36m(process_url pid=3333255)[0m download segments
[36m(process_url pid=3333262)[0m fetch video info[32m [repeated 7x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)[0m
[36m(process_url pid=3333258)[0m processing images
[36m(process_url pid=3333256)[0m download segments[32m [repeated 7x across cluster][0m
[36m(process_url pid=3333267)[0m fetch video info[32m [repeated 3x across cluster][0m
[36m(process_url pid=3333256)[0m processing images[32m [repeated 7x across cluster][0m
[36m(process_url pid=3333282)[0m download segments[32m [repeated 4x across cluster][0m
[36m(process_url pid=3333271)[0m fetch video info[32m [repeated 5x across cluster][0m
[36m(process_url pid=3333270)[0m processing images[32m [repeated 2x across cluster]



[36m(process_url pid=3333300)[0m download segments[32m [repeated 8x across cluster][0m
[36m(process_url pid=3333272)[0m fetch video info[32m [repeated 3x across cluster][0m
[36m(process_url pid=3333283)[0m processing images[32m [repeated 4x across cluster][0m
[36m(process_url pid=3333278)[0m download segments[32m [repeated 2x across cluster][0m
[36m(process_url pid=3333297)[0m fetch video info[32m [repeated 7x across cluster][0m
[36m(process_url pid=3333272)[0m processing images[32m [repeated 5x across cluster][0m
[36m(process_url pid=3333297)[0m download segments[32m [repeated 6x across cluster][0m
[36m(process_url pid=3333290)[0m fetch video info[32m [repeated 2x across cluster][0m
[36m(process_url pid=3333297)[0m processing images[32m [repeated 6x across cluster][0m
[36m(process_url pid=3333308)[0m download segments[32m [repeated 5x across cluster][0m
[36m(process_url pid=3333265)[0m fetch video info[32m [repeated 5x across cluster][0m
[3

[36m(process_url pid=3368324)[0m ERROR: [youtube] AG6H5i1x5A0: Video unavailable


[36m(process_url pid=3368791)[0m download segments[32m [repeated 2x across cluster][0m
[36m(process_url pid=3368791)[0m fetch video info[32m [repeated 2x across cluster][0m
[36m(process_url pid=3367246)[0m processing images[32m [repeated 5x across cluster][0m




[36m(process_url pid=3371114)[0m download segments[32m [repeated 6x across cluster][0m
[36m(process_url pid=3371778)[0m fetch video info[32m [repeated 7x across cluster][0m
[36m(process_url pid=3368791)[0m processing images
[36m(process_url pid=3371476)[0m processing images
[36m(process_url pid=3372359)[0m download segments[32m [repeated 2x across cluster][0m
[36m(process_url pid=3372359)[0m fetch video info
[36m(process_url pid=3372590)[0m fetch video info
[36m(process_url pid=3372359)[0m processing images[32m [repeated 6x across cluster][0m
[36m(process_url pid=3375089)[0m download segments[32m [repeated 3x across cluster][0m
[36m(process_url pid=3376256)[0m fetch video info[32m [repeated 7x across cluster][0m
[36m(process_url pid=3374780)[0m processing images[32m [repeated 2x across cluster][0m
[36m(process_url pid=3376256)[0m download segments[32m [repeated 5x across cluster][0m
[36m(process_url pid=3376632)[0m fetch video info
[36m(proces



[36m(process_url pid=3386748)[0m download segments[32m [repeated 4x across cluster][0m
[36m(process_url pid=3385151)[0m processing images[32m [repeated 2x across cluster][0m
[36m(process_url pid=3388785)[0m fetch video info[32m [repeated 7x across cluster][0m
[36m(process_url pid=3387437)[0m download segments[32m [repeated 4x across cluster][0m
[36m(process_url pid=3388785)[0m processing images[32m [repeated 5x across cluster][0m
[36m(process_url pid=3390748)[0m fetch video info[32m [repeated 2x across cluster][0m
[36m(process_url pid=3390894)[0m download segments[32m [repeated 4x across cluster][0m
[36m(process_url pid=3390894)[0m processing images[32m [repeated 2x across cluster][0m
[36m(process_url pid=3392610)[0m fetch video info[32m [repeated 5x across cluster][0m
[36m(process_url pid=3392609)[0m download segments[32m [repeated 4x across cluster][0m
[36m(process_url pid=3392610)[0m processing images[32m [repeated 6x across cluster][0m
[

KeyboardInterrupt: 

In [None]:
%pip install ray



Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting ray
  Downloading ray-2.40.0-cp310-cp310-manylinux2014_x86_64.whl.metadata (17 kB)
Downloading ray-2.40.0-cp310-cp310-manylinux2014_x86_64.whl (66.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.8/66.8 MB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: ray
Successfully installed ray-2.40.0
[0mNote: you may need to restart the kernel to use updated packages.


KeyError: 'Youtube_url'

['https://www.youtube.com/watch?v=HyHNuVaZJ-k',
 'https://www.youtube.com/watch?v=yYDmaexVHic',
 'https://www.youtube.com/watch?v=qJa-VFwPpYA',
 'https://www.youtube.com/watch?v=04mfKJWDSzI',
 'https://www.youtube.com/watch?v=1V_xRb0x9aw',
 'https://www.youtube.com/watch?v=uAOR6ib95kQ',
 'https://www.youtube.com/watch?v=BONNm0F7Tto',
 'https://www.youtube.com/watch?v=f8NwLXYIHS4',
 'https://www.youtube.com/watch?v=S03T47hapAc',
 'https://www.youtube.com/watch?v=cLnkQAeMbIM',
 'https://www.youtube.com/watch?v=YlUKcNNmywk',
 'https://www.youtube.com/watch?v=GLvohMXgcBo',
 'https://www.youtube.com/watch?v=8DyziWtkfBw',
 'https://www.youtube.com/watch?v=mzJj5-lubeM',
 'https://www.youtube.com/watch?v=rn_YodiJO6k',
 'https://www.youtube.com/watch?v=yuFI5KSPAt4',
 'https://www.youtube.com/watch?v=Sb5aq5HcS1A',
 'https://www.youtube.com/watch?v=JnfyjwChuNU',
 'https://www.youtube.com/watch?v=Mr_uHJPUlO8',
 'https://www.youtube.com/watch?v=Q0oIoR9mLwc',
 'https://www.youtube.com/watch?v=5qm8PH