<a href="https://colab.research.google.com/github/RockhoRockho/Data-project/blob/main/15_UCF11_DataSet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from glob import glob

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
len(glob('drive/MyDrive//UCF11_updated_mpg/*/*/*.mpg'))
print(len(glob('drive/MyDrive/UCF11_updated_mpg/*/*/*.mpg')))
file_paths = glob('drive/MyDrive/UCF11_updated_mpg/*/*/*.mpg')

In [None]:
print(file_paths[0])

In [None]:
cap = cv2.VideoCapture(file_paths[0])

frames = []
while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame = cv2.resize(frame, (256, 256))
    frame = frame[:, :, [2, 1, 0]] # BGR -> RGB
    frames.append(frame)
cap.release()

In [None]:
arr = np.array(frames)
plt.figure(figsize=(15, 15))
for i in range(10):
    plt.subplot(10, 3, 1 + 3*i)
    plt.imshow(arr[1 + 3*i])
    plt.subplot(10, 3, 2 + 3*i)
    plt.imshow(arr[2 + 3*i])
    plt.subplot(10, 3, 3 + 3*i)
    plt.imshow(arr[3 + 3*i])
plt.tight_layout()

In [None]:
len(frames) / 29.97

In [None]:
for file_path in file_paths:
    cap = cv2.VideoCapture(file_path)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print(file_path)
    print(length, 'frames')
    print(length / 29.97, 'sec')
    cap.release()
    break

In [None]:
import pandas as pd

df = pd.DataFrame(columns=[
    'file_path', 'frames', 'duration',
    'label'
])

for file_path in file_paths:
    label = file_path.split('/')[3]
    
    cap = cv2.VideoCapture(file_path)
    frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = frames / 29.97
    elem = {
        'file_path': file_path,
        'frames': frames,
        'duration': duration,
        'label': label
    }
    df.loc[len(df)] = elem
    cap.release()

In [None]:
df

In [None]:
df_duration_sum_by_label = df.groupby('label').duration.sum().rename('Sum').\
    reset_index().set_index('label')

df_duration_sum_by_label

In [None]:
df_duration_avg_by_label = df.groupby('label').duration.mean().rename('Average').\
    reset_index().set_index('label')

df_duration_avg_by_label

In [None]:
df_video_stats = pd.concat([df_duration_sum_by_label, df_duration_avg_by_label], axis=1)
df_video_stats.plot.bar(secondary_y='Average')

In [None]:
df['> 10.0 Sec'] = df['duration'] >= 10.0
df['5.0 - 10.0 Sec'] = (df['duration'] >= 5.0) & (df['duration'] < 10.0)
df['2.0 - 5.0 Sec'] = (df['duration'] >= 2.0) & (df['duration'] < 5.0)
df['0.0 - 2.0 Sec'] = (df['duration'] >= 0.0) & (df['duration'] < 2.0)
df

In [None]:
df_groupby_label = df.groupby('label')
df_groupby_stats = pd.concat([
    df_groupby_label['0.0 - 2.0 Sec'].sum().reset_index().set_index('label'),
    df_groupby_label['2.0 - 5.0 Sec'].sum().reset_index().set_index('label'),
    df_groupby_label['5.0 - 10.0 Sec'].sum().reset_index().set_index('label'),
    df_groupby_label['> 10.0 Sec'].sum().reset_index().set_index('label')
], axis=1)

df_groupby_stats

In [None]:
df_groupby_stats.plot.bar(stacked=True, ylabel='Number of Videos')

In [None]:
import random

In [None]:
label_dir = glob('drive/MyDrive/UCF11_updated_mpg/*')
label_dir

In [None]:
# 11개의 클래스
# 클래스당 25개의 영상 그룹
# 1 ~ 20 : 학습 그룹
# 20 ~ 25 : 테스트 그룹

train_df = pd.DataFrame(
    columns=['file_path', 'label']
)

valid_df = pd.DataFrame(
    columns=['file_path', 'label']
)

label_dirs = glob('drive/MyDrive/UCF11_updated_mpg/*')
for label_dir in label_dirs:
    file_dirs = glob(
        label_dir + '/v_*'
    )
    random.shuffle(file_dirs)
    
    for i in range(20):
        train_dir = file_dirs[i]
        label = train_dir.split('/')[-1].split('_')[1]
        file_path = random.choice(
            glob(train_dir + '/*')
        )
        train_df.loc[len(train_df)] = [file_path, label]
        
    for i in range(20, 25):
        valid_dir = file_dirs[i]
        label = valid_dir.split('/')[-1].split('_')[1]
        file_path = random.choice(
            glob(train_dir + '/*')
        )
        valid_df.loc[len(valid_df)] = [file_path, label]

In [None]:
print(len(train_df))
print(len(valid_df))

In [None]:
train_df

In [None]:
import os

os.mkdir('drive/MyDrive/UCF11_updated_png')
os.mkdir('drive/MyDrive/UCF11_updated_png/train')
os.mkdir('drive/MyDrive/UCF11_updated_png/valid')

In [None]:
train_df.to_csv('drive/MyDrive/ucf11_train_vid.csv', index=False)
valid_df.to_csv('drive/MyDrive/ucf11_valid_vid.csv', index=False)

In [None]:
max_frame = 10
SAVE_DIR = 'drive/MyDrive/UCF11_updated_png/'

for i, elem in train_df.iterrows():
    cap = cv2.VideoCapture(
    elem['file_path']
    )
    
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
            
        frame = cv2.resize(frame, (256, 256))
        frames.append(frame)
        
        if len(frames) == max_frame:
            break
    
    label = elem['label']
    for j, frame in enumerate(frames):
        file_name = f'train/{label}_{i}_{j}.png'
        cv2.imwrite(SAVE_DIR + file_name, frame)
    
    cap.release()

In [None]:
print(len(glob(SAVE_DIR + 'train/*')))

In [None]:
max_frame = 10
SAVE_DIR = 'drive/MyDrive/UCF11_updated_png/'

for i, elem in valid_df.iterrows():
    cap = cv2.VideoCapture(
    elem['file_path']
    )
    
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
            
        frame = cv2.resize(frame, (256, 256))
        frames.append(frame)
        
        if len(frames) == max_frame:
            break
    
    label = elem['label']
    for j, frame in enumerate(frames):
        file_name = f'valid/{label}_{i}_{j}.png'
        cv2.imwrite(SAVE_DIR + file_name, frame)
    
    cap.release()

In [None]:
print(len(glob(SAVE_DIR + 'valid/*')))