In [2]:
import warnings

In [125]:
import pandas as pd
from Katna.video import Video
from Katna.writer import KeyFrameDiskWriter
import os
from pathlib import Path
from tqdm import tqdm
from multiprocessing import Pool
import math
import logging

In [114]:
# Read metadata
meta_df = pd.read_csv("../../data_collection/data/final_videos_metadata.csv",
                      lineterminator='\n', index_col=0)
channel_df = pd.read_csv("../../data_collection/data/channel_metadata.csv")

In [126]:
logging.basicConfig(filename='keyframe.log', level=logging.DEBUG, 
                    format='%(asctime)s %(levelname)s %(name)s %(message)s')

In [130]:
error_ids = []

In [135]:
def extract_keyframes(video_path: str, output_dir: str) -> None:

    vd = Video()

    Path(output_dir).mkdir(parents=True, exist_ok=True)

    diskwriter = KeyFrameDiskWriter(location=output_dir)

    # Calculate number of frames based on length of video
    # 10 frames per minute
    video_id = output_dir.split('/')[-1]
    l = meta_df.loc[meta_df["id"] == video_id, "length"]
    n_frames = max(10, math.ceil((l/60)*10))
    try:
        vd.extract_video_keyframes(
            no_of_frames = n_frames, file_path = str(video_path),
            writer = diskwriter)
    except Exception as e:
        logging.error(f"Error extracting keyframes for {video_path}: {e}")
        error_ids.append(video_id)

In [139]:
def extract_channel(channel_path: Path, n_workers: int, channel:str, country: str) -> None:
    """
    Extracts keyframes from all videos in a channel
    in parallel using multiprocessing
    """

    pbar = tqdm(zip(channel_path.glob("*.mp4"), [f"out/{country}/{channel}/{video.name[:-8]}" for video in channel_path.glob("*.mp4")]))


    # Non parallel version
    for video_path, output_dir in pbar:
        extract_keyframes(video_path, output_dir)
        pbar.set_description(f"Extracting keyframes for video {output_dir}")



    #with Pool(n_workers) as p:
    #    p.starmap(extract_keyframes, zip(videos, 
    #                                     [f"out/{country}/{channel}/{video.name[:-8]}" for video in videos]))

In [140]:
def extract_all(videos_dir:str, country: str = None, n_workers: int = 20) -> None:
    """
    Extracts keyframes from all videos in all channels
    """


    channel_dirs = [p for p in Path(videos_dir).glob("*") if p.is_dir()]
    #print(channel_dirs)
    if country is not None:
        country_channels = channel_df.query(f"country == '{country}'")["channel"].tolist()
        channel_dirs = [p for p in channel_dirs if p.name in country_channels]

    pbar = tqdm(channel_dirs)
    for channel_dir in pbar:
        pbar.set_description(f"Extracting keyframes from channel {channel_dir.name}")
        extract_channel(channel_dir, n_workers, channel_dir.name, country)

In [141]:
extract_all("../Videos", country="sweden", n_workers=20)



Completed processing for :  ../Videos/studiolararforbundetstuden2879/hhcilMZdKhs-135.mp4




Completed processing for :  ../Videos/studiolararforbundetstuden2879/mw9QZV5lkyQ-135.mp4




Completed processing for :  ../Videos/studiolararforbundetstuden2879/f-1_l08aOB4-135.mp4




Completed processing for :  ../Videos/studiolararforbundetstuden2879/NpwGKwHdoVI-135.mp4


Extracting keyframes for video out/sweden/studiolararforbundetstuden2879/5RjR7xienuE: : 5it [01:17, 15.50s/it]
Extracting keyframes from channel Transportfacket:   1%|          | 1/88 [01:17<1:52:23, 77.52s/it]               

Completed processing for :  ../Videos/studiolararforbundetstuden2879/5RjR7xienuE-135.mp4




Completed processing for :  ../Videos/Transportfacket/ERKZ_GCQtWU-135.mp4




Completed processing for :  ../Videos/Transportfacket/ihK_ktkXgb4-135.mp4




Completed processing for :  ../Videos/Transportfacket/yLSJ1Rmm2_M-137.mp4




Completed processing for :  ../Videos/Transportfacket/jKRkHxx04ok-137.mp4




Completed processing for :  ../Videos/Transportfacket/uwHOIJjRJBE-299.mp4




Completed processing for :  ../Videos/Transportfacket/sVonDXmssfw-136.mp4




Completed processing for :  ../Videos/Transportfacket/upGlhQHNLxU-137.mp4




Completed processing for :  ../Videos/Transportfacket/Z6UeN77IuyA-137.mp4




Completed processing for :  ../Videos/Transportfacket/67Zh49BBKx8-137.mp4




Completed processing for :  ../Videos/Transportfacket/4oCyLJdBKAk-137.mp4




Completed processing for :  ../Videos/Transportfacket/4mhIQYf1LUo-137.mp4




Completed processing for :  ../Videos/Transportfacket/PsAHCeNRFSg-137.mp4




Completed processing for :  ../Videos/Transportfacket/J_Idj-US4-E-137.mp4




Completed processing for :  ../Videos/Transportfacket/-W90HuZXR8g-299.mp4




Completed processing for :  ../Videos/Transportfacket/pJ8reKOzeuM-137.mp4




Completed processing for :  ../Videos/Transportfacket/Gq_lDmGZWZE-137.mp4




Completed processing for :  ../Videos/Transportfacket/Ecr86HlIPQ4-137.mp4




Completed processing for :  ../Videos/Transportfacket/2uhYfYTEl6o-137.mp4




Completed processing for :  ../Videos/Transportfacket/pAgECneme4M-137.mp4




Completed processing for :  ../Videos/Transportfacket/e6HlDBAX1T0-137.mp4




Completed processing for :  ../Videos/Transportfacket/scQZ1KWAW_g-299.mp4




Completed processing for :  ../Videos/Transportfacket/_8NwO1ge2SY-137.mp4




Completed processing for :  ../Videos/Transportfacket/UTMP7WwgDEc-137.mp4




Completed processing for :  ../Videos/Transportfacket/lEckUO2XFxk-137.mp4




Completed processing for :  ../Videos/Transportfacket/BJuXqZUHMoA-137.mp4




Completed processing for :  ../Videos/Transportfacket/kldhJmNUobg-135.mp4




Completed processing for :  ../Videos/Transportfacket/PZtkf4EmSfY-137.mp4




Completed processing for :  ../Videos/Transportfacket/fYbLX6vS45g-137.mp4




Completed processing for :  ../Videos/Transportfacket/YkC3VkCohPQ-137.mp4




Completed processing for :  ../Videos/Transportfacket/TI892bRvjFA-137.mp4




Completed processing for :  ../Videos/Transportfacket/r7onA5hS9d8-137.mp4




Completed processing for :  ../Videos/Transportfacket/hH-Z9t1AxrI-137.mp4




Completed processing for :  ../Videos/Transportfacket/Fkn60_xkLcQ-137.mp4




Completed processing for :  ../Videos/Transportfacket/h6qYgHcTtbY-137.mp4




Completed processing for :  ../Videos/Transportfacket/6AEW-GuDvgM-137.mp4




Completed processing for :  ../Videos/Transportfacket/rtx0B90sOBw-137.mp4




Completed processing for :  ../Videos/Transportfacket/FyRASXMro8g-137.mp4




Completed processing for :  ../Videos/Transportfacket/EEp2CcuIONo-137.mp4




Completed processing for :  ../Videos/Transportfacket/XejTgVdm10s-137.mp4




Completed processing for :  ../Videos/Transportfacket/vMqiGLkyJ-U-135.mp4




Completed processing for :  ../Videos/Transportfacket/Xxfglh0D8Ts-134.mp4




Completed processing for :  ../Videos/Transportfacket/uhCEJkAbFlE-137.mp4




Completed processing for :  ../Videos/Transportfacket/lu4pWI8X_rY-299.mp4


[h264 @ 0x5952300] mmco: unref short failure
[h264 @ 0x9af85c0] mmco: unref short failure


Completed processing for :  ../Videos/Transportfacket/vKi88amC0IU-135.mp4




Completed processing for :  ../Videos/Transportfacket/xZKYHA8GZ4I-137.mp4




Completed processing for :  ../Videos/Transportfacket/4IRbJrYEAe0-135.mp4




Completed processing for :  ../Videos/Transportfacket/PjYt9zEK5BA-137.mp4




Completed processing for :  ../Videos/Transportfacket/Stp2fdw0IjA-137.mp4




Completed processing for :  ../Videos/Transportfacket/-Vn_tfRebOU-135.mp4




Completed processing for :  ../Videos/Transportfacket/fPqPie9Htis-137.mp4




Completed processing for :  ../Videos/Transportfacket/nOeOrjG-TTI-137.mp4




Completed processing for :  ../Videos/Transportfacket/tcmZJII1T48-137.mp4




Completed processing for :  ../Videos/Transportfacket/GtNvgTdTzqo-137.mp4




Completed processing for :  ../Videos/Transportfacket/cAAXvPolKew-135.mp4




Completed processing for :  ../Videos/Transportfacket/cx5nODG40PM-137.mp4




Completed processing for :  ../Videos/Transportfacket/7FcMsBQ2h_w-137.mp4




Completed processing for :  ../Videos/Transportfacket/MQg5vhJbEzE-137.mp4




Completed processing for :  ../Videos/Transportfacket/NPjYV8RVs6E-137.mp4




Completed processing for :  ../Videos/Transportfacket/EBDEuDj4KgU-135.mp4




Completed processing for :  ../Videos/Transportfacket/QcDeG1tAYWw-137.mp4




Completed processing for :  ../Videos/Transportfacket/vtKvcG6NLlE-137.mp4




Completed processing for :  ../Videos/Transportfacket/p2YoFe257OA-137.mp4




Completed processing for :  ../Videos/Transportfacket/Ms9BjuBfhk8-135.mp4




Completed processing for :  ../Videos/Transportfacket/JbpHbLTsLd4-137.mp4




Completed processing for :  ../Videos/Transportfacket/RWsypNfsyFk-137.mp4




Completed processing for :  ../Videos/Transportfacket/JjQXhuKjRkM-137.mp4




Completed processing for :  ../Videos/Transportfacket/Brllu-v71Hk-137.mp4




Completed processing for :  ../Videos/Transportfacket/rP_NfgAKl6Q-137.mp4




Completed processing for :  ../Videos/Transportfacket/4b3nQIzVZ3g-137.mp4




Completed processing for :  ../Videos/Transportfacket/0X71VXMDoCY-299.mp4




Completed processing for :  ../Videos/Transportfacket/wIV6Dl8wjZc-135.mp4




Completed processing for :  ../Videos/Transportfacket/MBohtfFXYlA-137.mp4




Completed processing for :  ../Videos/Transportfacket/J-WRTLlWQxY-136.mp4


[h264 @ 0x8c79400] mmco: unref short failure
[h264 @ 0xa4d5f00] mmco: unref short failure


Completed processing for :  ../Videos/Transportfacket/zPJDL1AyjEk-137.mp4




Completed processing for :  ../Videos/Transportfacket/nrI38eqQ5u0-137.mp4




Completed processing for :  ../Videos/Transportfacket/fpWb4-N2ATM-299.mp4




Completed processing for :  ../Videos/Transportfacket/ELGKpQ0zPCs-137.mp4




Completed processing for :  ../Videos/Transportfacket/7WaigadyEVE-134.mp4




Completed processing for :  ../Videos/Transportfacket/57JIzggUY58-137.mp4




Completed processing for :  ../Videos/Transportfacket/QBnA8Ji4GtE-134.mp4




Completed processing for :  ../Videos/Transportfacket/pwaDqT-VKMM-137.mp4




Completed processing for :  ../Videos/Transportfacket/huNungCY_WU-299.mp4




Completed processing for :  ../Videos/Transportfacket/4n48s9MFeGM-135.mp4




Completed processing for :  ../Videos/Transportfacket/4ZUn0LUE_RA-135.mp4


Extracting keyframes for video out/sweden/Transportfacket/fQwaEe5au-k: : 85it [1:40:50, 71.18s/it]

Completed processing for :  ../Videos/Transportfacket/fQwaEe5au-k-135.mp4





Completed processing for :  ../Videos/lararforbundethalmstad4100/ta7UNxG0qck-135.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/jNsUJxyMGoU-137.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/6ncrxRnKMCU-137.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/iX16bzmix2I-133.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/iify0BNtmGM-136.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/ISIiRFTaINk-135.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/aRL6UnMSoQQ-135.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/6lPtPfVQuas-134.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/LEblJM4T_uY-135.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/YY396Ph6S6I-135.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/sGOf7QxpUS0-134.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/llxeV4KGdaA-135.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/HfdDodSyF00-135.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/P7AHxtmeEvQ-135.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/xdRVAC4Nw9k-137.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/O_mHlDhSYRA-135.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/mLN0sotRP7M-135.mp4




Completed processing for :  ../Videos/lararforbundethalmstad4100/qOhtbZl_tZY-135.mp4


Extracting keyframes for video out/sweden/lararforbundethalmstad4100/PYTrjvoyq8g: : 19it [13:45, 43.45s/it]
Extracting keyframes from channel DIKforbundet:   3%|▎         | 3/88 [1:56:02<55:04:05, 2332.29s/it]              

Completed processing for :  ../Videos/lararforbundethalmstad4100/PYTrjvoyq8g-136.mp4




Completed processing for :  ../Videos/DIKforbundet/FHHm5w_0vaU-135.mp4




Completed processing for :  ../Videos/DIKforbundet/AAHMZ3uhgQg-134.mp4




Completed processing for :  ../Videos/DIKforbundet/SjePLkWCzMU-135.mp4


Extracting keyframes for video out/sweden/DIKforbundet/-P98UVhv08s: : 4it [02:32, 38.15s/it]
Extracting keyframes from channel Teknologvalet:   5%|▍         | 4/88 [1:58:34<34:20:29, 1471.78s/it]

Completed processing for :  ../Videos/DIKforbundet/-P98UVhv08s-135.mp4




Completed processing for :  ../Videos/Teknologvalet/0ssazSQeFjc-136.mp4




Completed processing for :  ../Videos/Teknologvalet/wb0DjNqS7tQ-136.mp4




Completed processing for :  ../Videos/Teknologvalet/K91V5O1duH0-136.mp4




Completed processing for :  ../Videos/Teknologvalet/kXo_eueZ9DQ-136.mp4


Extracting keyframes for video out/sweden/Teknologvalet/14IHk-bk_AE: : 5it [03:56, 47.24s/it]
Extracting keyframes from channel VisionNorrkoping:   6%|▌         | 5/88 [2:02:31<23:39:36, 1026.22s/it]

Completed processing for :  ../Videos/Teknologvalet/14IHk-bk_AE-136.mp4




Completed processing for :  ../Videos/VisionNorrkoping/lSYRPM6_A2g-137.mp4




Completed processing for :  ../Videos/VisionNorrkoping/X5YWRbCQcvc-137.mp4


Extracting keyframes for video out/sweden/VisionNorrkoping/mepp7ABn-Ec: : 3it [04:25, 88.61s/it]
Extracting keyframes from channel lararforbundetkungalv6704:   7%|▋         | 6/88 [2:06:56<17:29:10, 767.69s/it]

Completed processing for :  ../Videos/VisionNorrkoping/mepp7ABn-Ec-137.mp4




Completed processing for :  ../Videos/lararforbundetkungalv6704/YuD8CXsQ_GU-137.mp4




Completed processing for :  ../Videos/lararforbundetkungalv6704/tm_DMKIpLCM-136.mp4


Exception ignored in: <function _releaseLock at 0x7f0803064c10>
Traceback (most recent call last):
  File "/zpool/beast-mirror/labour-movements-mobilisation-via-visual-means/katrin_env/lib/python3.10/logging/__init__.py", line 228, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 
