In [1]:
import os
import subprocess

def process_videos_in_folder(folder_path):
    print()
    for file_name in os.listdir((folder_path)):
        if file_name.startswith("filtered_"):
            print(f"El video {file_name} ya fue procesado")
            continue
        if file_name.endswith(".mp4"):
            input_path = os.path.join(folder_path, file_name)
            output_path = os.path.join(folder_path, f"filtered_{file_name}")
            
            # Comando ffmpeg
            command = [
                "ffmpeg", 
                "-i", input_path, 
                "-vf", r"select=not(mod(n\,3))", 
                "-vsync", "vfr", 
                "-c:v", "libx264", 
                output_path
            ]
            try:
                subprocess.run(command, check=True)
                print(f"Video procesado: {file_name}")
                os.remove(input_path)
            except subprocess.CalledProcessError as e:
                print(f"Error procesando el video {file_name}: {e}")

In [2]:
import sys

def create_base_path(inContainer: bool):
    if inContainer:
        base_path = "/" + os.getcwd().split("/")
    else:
        base_path = os.getcwd().split("\\")
    return base_path
base_path = create_base_path(inContainer=False)
project_directory = r""
for part in base_path:
    if part != "Modules":
        project_directory += part
        project_directory += r"/"
modules_directory = r"Modules"

modules_path = project_directory + modules_directory

sys.path.append(modules_path)
print(project_directory)
print(os.listdir(project_directory))

c:/Users/48113164/Documents/GitHub/SignAI-ML/AI-Module/
['BERT', 'CNN-LSTM', 'Extra', 'main.py', 'Modules', 'Resources', 'Tests', 'Transformer', 'ViT']


In [3]:
#root_vids = project_directory + "Resources/Videos/raw_videos"
root_vids = r"C:\Users\48113164\Documents\raw_videos"
root_translation_csv = project_directory + "Resources/Translations"

In [4]:
def obtain_paths(root, extension):
    paths = []

    for filename in os.listdir(root):
        if filename.endswith(extension):
            paths.append(os.path.join(root, filename))
    return paths

In [5]:
import KeyFrameExtractorClass
import Points2VecClass
import cv2
import VideoFormaterClass
import pandas as pd

In [6]:
vids = obtain_paths(root_vids, '.mp4')[:700]
translation_csv = obtain_paths(root_translation_csv, '.csv')[1]
print("vids:" + str(len(vids)))
print(vids[:5])
print("translation_csv: " + str(translation_csv))

vids:700
['C:\\Users\\48113164\\Documents\\raw_videos\\-3bw4dQEyF8-5-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1dJbYUmWpSY-8-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1dLchY8R6tU-8-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1dpRaxOTfZs-8-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1DSsHgAToGY-5-rgb_front.mp4']
translation_csv: c:/Users/48113164/Documents/GitHub/SignAI-ML/AI-Module/Resources/Translations\how2sign_train.csv


In [7]:
def make_df_video(video_paths, csv_path, type):
    kfe = KeyFrameExtractorClass.KeyFrameExtractor()
    videoFormater = VideoFormaterClass.VideoFormater()
    normalizer = Points2VecClass.Point2Vec(4)
    translationDf = videoFormater.csvToTranslationDf(csv_path)
    if not os.path.exists(project_directory + f"Resources/Datasets/{type}"):
        os.mkdir(project_directory + f"Resources/Datasets/{type}")
    for index, path in enumerate(video_paths):
        if "filtered_" not in path:
            print(f"skipped: {path}")
            continue
        print("Processing video: ", index)
        video = cv2.VideoCapture(path)
        file_name = path.split(os.path.sep)[-1].split(".mp4")[0].split("filtered_")[1]
        if (file_name + ".csv") in os.listdir(project_directory + f"Resources/Datasets/{type}"):
            print(f"El video {index} ya fue procesado")
            continue
        translation_filter = translationDf["VIDEO_NAME"] == file_name
        translation = translationDf[translation_filter]["SENTENCE"].iloc[0]
        video_points = kfe.extractKeyFrames(return_frame=False, draw=False, video=video)
        landmarks = normalizer.land2vec(video_points)
        cant_keyframes = len(video_points)
        dict = {"points": landmarks, "translation": translation, "id": index+500, "len_keyframes": cant_keyframes}
        dataFrame = videoFormater.formatVideo(dict)
        videoFormater.concatAndExportVideos(dataFrame, project_directory + f"Resources/Datasets/{type}/{file_name}.csv")
    return project_directory + f"Resources/Datasets/{type}"

In [8]:
def processVideos(paths, csv_path, folder_name, root_vids):
    print(paths, csv_path, folder_name)
    process_videos_in_folder(root_vids)
    csvs_path = make_df_video(paths, csv_path, folder_name)
    return csvs_path

In [9]:
csvs_path = processVideos(vids, translation_csv, "videos", root_vids)

['C:\\Users\\48113164\\Documents\\raw_videos\\-3bw4dQEyF8-5-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1dJbYUmWpSY-8-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1dLchY8R6tU-8-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1dpRaxOTfZs-8-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1DSsHgAToGY-5-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1E1vnnehwdE-5-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1e2wDf5R_zk-8-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1E4aT0mJ-V4-5-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1EaPhQ-7CsA-5-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1EC-cM1w4sQ-5-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1eCzLIfdH2o-8-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1ed8Dw0Kou8-8-rgb_front.mp4', 'C:\\Users\\48113164\\Documents\\raw_videos\\1EEgqE9W4k8-3-rgb_front.mp4', 'C:\\Users\\48113164\\Do

In [19]:
def joincsvs(path):
    csvs = obtain_paths(path, '.csv')
    final_df = pd.DataFrame()
    for index, csv in enumerate(csvs):
        print("Processing csv: ", index)
        for chunk in pd.read_csv(csv, chunksize=10000):
            final_df = pd.concat([final_df, chunk], ignore_index=True)
    videoFormater = VideoFormaterClass.VideoFormater()
    videoFormater.concatAndExportVideos(final_df, project_directory + "Resources/Datasets/how2sign.csv")

In [None]:
csvs_path = project_directory + f"Resources/Datasets/videos"
joincsvs(csvs_path)