In [1]:
import os

import cv2
import pandas as pd

In [2]:
video_folder = 'videos'
csv_file = 'new_df.csv'
# frames_folder = 'frames_traintest'

In [3]:
data = pd.read_csv("new_df.csv")
data

Unnamed: 0,label,youtube_id,time_start,time_end,split
0,tap dancing,GcluCxjiSjI,30,40,train
1,tap dancing,5FuYP_l6J-s,4,14,train
2,tap dancing,5AAYAhVcZNM,61,71,train
3,tap dancing,fPZb1IB-U_k,19,29,train
4,tap dancing,k9edXWpg64E,38,48,train
...,...,...,...,...,...
613,tango dancing,hC7lESvyJt8,82,92,train
614,tango dancing,Pgl0MeU6Xqs,13,23,train
615,tango dancing,UyQ5vHj0fNU,107,117,train
616,tango dancing,S2sn-1HF2LY,3,13,train


In [4]:
test_data = pd.DataFrame()
label_values = data['label'].unique()
for label in label_values:
    label_data = data[data['label'] == label].sample(5, random_state = 2024)
    test_data = pd.concat([test_data, label_data])

In [5]:
test_data

Unnamed: 0,label,youtube_id,time_start,time_end,split
25,tap dancing,BheDJtlAYpA,0,10,train
16,tap dancing,3hII9QR7sag,51,61,train
22,tap dancing,EgM_PTRnV2k,5,15,train
29,tap dancing,iT088BHtz5Y,15,25,train
33,tap dancing,3gaE6kCFXMg,180,190,train
...,...,...,...,...,...
593,tango dancing,bSKe45ZLGr8,113,123,train
607,tango dancing,qSohMCcGHUY,98,108,train
586,tango dancing,w_Z8QaP3r64,4,14,train
577,tango dancing,Y2t7bV5E8kc,25,35,train


In [6]:
data = data[~data['youtube_id'].isin(test_data['youtube_id'])]

In [7]:
data

Unnamed: 0,label,youtube_id,time_start,time_end,split
0,tap dancing,GcluCxjiSjI,30,40,train
1,tap dancing,5FuYP_l6J-s,4,14,train
2,tap dancing,5AAYAhVcZNM,61,71,train
3,tap dancing,fPZb1IB-U_k,19,29,train
4,tap dancing,k9edXWpg64E,38,48,train
...,...,...,...,...,...
613,tango dancing,hC7lESvyJt8,82,92,train
614,tango dancing,Pgl0MeU6Xqs,13,23,train
615,tango dancing,UyQ5vHj0fNU,107,117,train
616,tango dancing,S2sn-1HF2LY,3,13,train


In [8]:
def extractor(data, frames_folder):
    frames_df = pd.DataFrame()
    for index, row in data.iterrows():
    
        # Полный путь к видео
        video_path = f"{video_folder}/{row['youtube_id']}.mp4"
        
        # Загрузка видео
        video = cv2.VideoCapture(video_path)
        
        # Установка позиции в начало интересующего сегмента видео
        video.set(cv2.CAP_PROP_POS_MSEC, row['time_start'] * 1000)
        
        # Интервал сохранения кадров в миллисекундах (здесь каждую секунду)
        frame_interval = 1000
        
        # Чтение и сохранение кадров
        frame_count = 0
        current_time = row['time_start']
        while video.isOpened() and current_time <= row['time_end']:
            ret, frame = video.read()
            if not ret:
                break
            
            # Сохранение кадра в файл
            frame_path = f"{frames_folder}/{index}_{frame_count}.jpg"
            cv2.imwrite(frame_path, frame)
            frame_count += 1
            current_time += .5
            video.set(cv2.CAP_PROP_POS_FRAMES, current_time * video.get(cv2.CAP_PROP_FPS))
            new_frame = pd.DataFrame({
                'video': row['youtube_id'],
                'frame_path': [frame_path],
                'label': row['label']
                })
            frames_df = pd.concat([frames_df, new_frame], ignore_index=True)
        video.release()
    return frames_df

In [9]:
train_frames_df = extractor(data, 'train')
train_frames_df

Unnamed: 0,video,frame_path,label
0,GcluCxjiSjI,train/0_0.jpg,tap dancing
1,GcluCxjiSjI,train/0_1.jpg,tap dancing
2,GcluCxjiSjI,train/0_2.jpg,tap dancing
3,GcluCxjiSjI,train/0_3.jpg,tap dancing
4,GcluCxjiSjI,train/0_4.jpg,tap dancing
...,...,...,...
11167,LT-e_wj6d9w,train/617_16.jpg,tango dancing
11168,LT-e_wj6d9w,train/617_17.jpg,tango dancing
11169,LT-e_wj6d9w,train/617_18.jpg,tango dancing
11170,LT-e_wj6d9w,train/617_19.jpg,tango dancing


In [10]:
test_frames_df = extractor(test_data, 'test')
test_frames_df

Unnamed: 0,video,frame_path,label
0,BheDJtlAYpA,test/25_0.jpg,tap dancing
1,BheDJtlAYpA,test/25_1.jpg,tap dancing
2,BheDJtlAYpA,test/25_2.jpg,tap dancing
3,BheDJtlAYpA,test/25_3.jpg,tap dancing
4,BheDJtlAYpA,test/25_4.jpg,tap dancing
...,...,...,...
1522,F9Ehijaqdl0,test/587_16.jpg,tango dancing
1523,F9Ehijaqdl0,test/587_17.jpg,tango dancing
1524,F9Ehijaqdl0,test/587_18.jpg,tango dancing
1525,F9Ehijaqdl0,test/587_19.jpg,tango dancing


In [11]:
os.makedirs("train_20", exist_ok=True)
os.makedirs("test_20", exist_ok=True)

In [12]:
train_frames_df.to_csv('train_frames_20.csv',index=False)
test_frames_df.to_csv('test_frames_20.csv',index=False)