In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
import pickle

from typing import List, Tuple, Dict
import ffmpeg as ff


import warnings
warnings.filterwarnings(action='ignore') 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_gt = pd.read_csv('./dataset/train.csv',encoding='EUC-KR')
test_gt = pd.read_csv('./dataset/test.csv', encoding='EUC-KR')

In [3]:
train_gt

Unnamed: 0,path,label
0,단어_106_가렵다_정면_1_이숙기.MOV,0
1,단어_108_가슴_정면_1_이숙기.MOV,1
2,단어_113_감전_정면_1_이숙기.MOV,2
3,단어_120_개_정면_1_이숙기.MOV,3
4,단어_121_거실_정면_1_이숙기.MOV,4
...,...,...
75,단어_127_계곡_정면_8.MOV,5
76,단어_152_기절_정면_8.MOV,6
77,단어_180_도둑_정면_8.MOV,7
78,단어_189_동전_정면_8.MOV,8


In [4]:
test_gt

Unnamed: 0,path,label
0,단어_106_가렵다_정면_9.MOV,0
1,단어_108_가슴_정면_9.MOV,1
2,단어_113_감전_정면_9.MOV,2
3,단어_120_개_정면_9.MOV,3
4,단어_121_거실_정면_9.MOV,4
5,단어_127_계곡_정면_9.MOV,5
6,단어_152_기절_정면_9.MOV,6
7,단어_180_도둑_정면_9.MOV,7
8,단어_189_동전_정면_9.MOV,8
9,단어_276_아빠_정면_9.MOV,9


## 각 영상마다 frame, width, height 확인

In [5]:
def check_preprocessing_input(file_path: str, file_name: str, dictionary: Dict[str, str], training: bool = True) -> Tuple[np.ndarray, np.ndarray]:
    probe = ff.probe(filePath)
    video_streams = [stream for stream in probe["streams"] if stream["codec_type"] == "video"]
    print('frame = ', video_streams[0]['nb_frames'], 'width = ', video_streams[0]['coded_width'], ', height = ',video_streams[0]['coded_height'])
    del probe
    return

In [6]:
# training_set_data = []

path = './dataset/TRAIN'
for filename in os.listdir(path):
    filePath = path+'/'+filename
    print(filePath)
    check_preprocessing_input(file_path = filePath, file_name = filename, dictionary = train_gt)
    # training_set_data.append(preprocessing_input(file_path= filePath, file_name= filename, dictionary= gt, training= True))
    

./dataset/TRAIN/단어_106_가렵다_정면_1_이숙기.MOV
frame =  190 width =  1920 , height =  1088
./dataset/TRAIN/단어_113_감전_정면_8.MOV
frame =  332 width =  1920 , height =  1088
./dataset/TRAIN/단어_108_가슴_정면_6.MOV
frame =  205 width =  1920 , height =  1088
./dataset/TRAIN/단어_180_도둑_정면_1_이숙기.MOV
frame =  180 width =  1920 , height =  1088
./dataset/TRAIN/단어_108_가슴_정면_1_이숙기.MOV
frame =  177 width =  1920 , height =  1088
./dataset/TRAIN/단어_180_도둑_정면_8.MOV
frame =  329 width =  1920 , height =  1088
./dataset/TRAIN/단어_127_계곡_정면_1_이숙기.MOV
frame =  184 width =  1920 , height =  1088
./dataset/TRAIN/단어_152_기절_정면_5.MOV
frame =  179 width =  1920 , height =  1088
./dataset/TRAIN/단어_120_개_정면_4.MOV
frame =  172 width =  1920 , height =  1088
./dataset/TRAIN/단어_108_가슴_정면_3.MOV
frame =  343 width =  1920 , height =  1088
./dataset/TRAIN/단어_113_감전_정면_6.MOV
frame =  228 width =  1920 , height =  1088
./dataset/TRAIN/단어_152_기절_정면_4.MOV
frame =  183 width =  1920 , height =  1088
./dataset/TRAIN/단어_127_계곡_정면_6.MOV
f

# 추출 전략

##### 모든 영상에서 각각 30프레임씩 추출
 - (frame/30) 프레임 만큼 이동하면서 프레임 추출
 - width, height은 (224,224)로 resize

In [48]:
def get_number_of_frames(file_path: str) -> int:
    probe = ff.probe(filePath)
    video_streams = [stream for stream in probe["streams"] if stream["codec_type"] == "video"]
    #width = video_streams[0]['coded_width']
    #height = video_streams[0]['coded_height']
    del probe
    return video_streams[0]['nb_frames']

def extract_N_video_frames(file_path: str, number_of_samples: int = 6) -> List[np.ndarray]:
    nb_frames = int(get_number_of_frames(file_path= filePath))
    
    div = 30
    div_frames = nb_frames / div
    print('div_frames = ', div_frames)
    print('nb_frames = ', nb_frames)
    print('div = ', div)
    video_frames = []
    
    cap = cv2.VideoCapture(filePath)
    i_num=0
    for ind in range(30):
        ind += div_frames
        cap.set(1,int(ind))
        res, frame = cap.read()
        video_frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        i_num +=1
    print('추출한 frame = ', i_num)
    cap.release()
    del cap
    return video_frames

def resize_image(image: np.ndarray, new_size: Tuple[int,int]) -> np.ndarray:
    return cv2.resize(image, new_size, interpolation = cv2.INTER_AREA)

def preprocessing_input(file_path: str, file_name: str, gt = train_gt, training: bool = True) -> Tuple[np.ndarray, np.ndarray]:
    sampled = extract_N_video_frames(file_path= filePath, number_of_samples= 6)
    resized_images = [resize_image(image= im, new_size= (224,224)) for im in sampled]
    preprocessed_video = np.stack(resized_images)
    
    for i in range(len(gt)):
        if gt.loc[i,'path'] == file_name:
            video_gt = gt.loc[i,'label']
    return (preprocessed_video, video_gt)

In [49]:
training_set_data = []

path = './dataset/TRAIN'
i=0
for filename in os.listdir(path):
    filePath = path+'/'+filename
    training_set_data.append(preprocessing_input(file_path= filePath, file_name= filename, gt= train_gt, training= True))
    print(i)
    i+=1

div_frames =  6.333333333333333
nb_frames =  190
div =  30
추출한 frame =  30
0
div_frames =  11.066666666666666
nb_frames =  332
div =  30
추출한 frame =  30
1
div_frames =  6.833333333333333
nb_frames =  205
div =  30
추출한 frame =  30
2
div_frames =  6.0
nb_frames =  180
div =  30
추출한 frame =  30
3
div_frames =  5.9
nb_frames =  177
div =  30
추출한 frame =  30
4
div_frames =  10.966666666666667
nb_frames =  329
div =  30
추출한 frame =  30
5
div_frames =  6.133333333333334
nb_frames =  184
div =  30
추출한 frame =  30
6
div_frames =  5.966666666666667
nb_frames =  179
div =  30
추출한 frame =  30
7
div_frames =  5.733333333333333
nb_frames =  172
div =  30
추출한 frame =  30
8
div_frames =  11.433333333333334
nb_frames =  343
div =  30
추출한 frame =  30
9
div_frames =  7.6
nb_frames =  228
div =  30
추출한 frame =  30
10
div_frames =  6.1
nb_frames =  183
div =  30
추출한 frame =  30
11
div_frames =  7.866666666666666
nb_frames =  236
div =  30
추출한 frame =  30
12
div_frames =  7.366666666666666
nb_frames =  221


In [50]:
def reshape_to_expected_input(dataset: List[Tuple[np.ndarray, np.ndarray]]) -> Tuple[np.ndarray,np.ndarray]:
    
    x0_list = []
    y_list = []
    for i in range(0,len(dataset)):
        x0_list.append(dataset[i][0])
        y_list.append(dataset[i][1])
    return (np.stack(x0_list),np.stack(y_list))

In [54]:
train_input = reshape_to_expected_input(dataset= training_set_data)
del training_set_data

In [55]:
savename = 'training_set.dat'
with open(savename, "wb") as f:
    pickle.dump(train_input, f)

In [56]:
test_set_data = []

path = './dataset/TEST'
i=0
for filename in os.listdir(path):
    filePath = path+'/'+filename
    test_set_data.append(preprocessing_input(file_path= filePath, file_name= filename, gt= test_gt, training= False))
    print(i)
    i+=1

div_frames =  8.0
nb_frames =  240
div =  30
추출한 frame =  30
0
div_frames =  5.033333333333333
nb_frames =  151
div =  30
추출한 frame =  30
1
div_frames =  6.433333333333334
nb_frames =  193
div =  30
추출한 frame =  30
2
div_frames =  5.266666666666667
nb_frames =  158
div =  30
추출한 frame =  30
3
div_frames =  5.8
nb_frames =  174
div =  30
추출한 frame =  30
4
div_frames =  5.266666666666667
nb_frames =  158
div =  30
추출한 frame =  30
5
div_frames =  5.833333333333333
nb_frames =  175
div =  30
추출한 frame =  30
6
div_frames =  7.533333333333333
nb_frames =  226
div =  30
추출한 frame =  30
7
div_frames =  7.066666666666666
nb_frames =  212
div =  30
추출한 frame =  30
8
div_frames =  5.033333333333333
nb_frames =  151
div =  30
추출한 frame =  30
9
div_frames =  6.833333333333333
nb_frames =  205
div =  30
추출한 frame =  30
10
div_frames =  6.4
nb_frames =  192
div =  30
추출한 frame =  30
11
div_frames =  6.533333333333333
nb_frames =  196
div =  30
추출한 frame =  30
12
div_frames =  7.333333333333333
nb_fra

In [57]:
test_input = reshape_to_expected_input(dataset= test_set_data)
del test_set_data

In [58]:
savename = 'test_set.dat'
with open(savename, "wb") as f:
    pickle.dump(test_input, f)

# csv 파일 인코딩 확인

In [None]:
import chardet

with open('/home/ssrlab/kw/개인/Industry-Project/ai/dataset/train.csv', 'rb') as rawdata:
    result = chardet.detect(rawdata.read(10000))

In [None]:
print(result)