In [1]:
import numpy as np
from tqdm import tqdm
import os
import re
import natsort

In [3]:
folder_dir = 'extracted_frames'
target_dir = 'dataset'
# actions = np.array([])
video_labels = {'0': [], '1': [], '2': [], '3': [], '4': [], '5': []}

In [4]:
file_list = []
for roots, dirs, files in os.walk(folder_dir):
    for filename in files:
        # file_list.append(os.path.join(folder_dir, filename))
        file_list.append(filename)
print(f"Total files: {len(file_list)}")

Total files: 24841


In [5]:
# sort filename numerically
file_list = natsort.natsorted(file_list,reverse=False)

In [6]:
file_list[:10]

['video_0_keypoints_0.npy',
 'video_0_keypoints_30.npy',
 'video_0_keypoints_60.npy',
 'video_0_keypoints_90.npy',
 'video_0_keypoints_120.npy',
 'video_0_keypoints_150.npy',
 'video_0_keypoints_180.npy',
 'video_0_keypoints_210.npy',
 'video_0_keypoints_240.npy',
 'video_0_keypoints_270.npy']

In [7]:
# Check each class
for filename in tqdm(file_list):
    file_label = filename.split('_')[1]
    video_labels[file_label].append(filename)

100%|██████████| 24841/24841 [00:00<00:00, 2043997.05it/s]


In [8]:
# Show number of each class
print('Number of frame in class 0: ', len(video_labels['0']))
print('Number of frame in class 1: ', len(video_labels['1']))
print('Number of frame in class 2: ', len(video_labels['2']))
print('Number of frame in class 3: ', len(video_labels['3']))
print('Number of frame in class 4: ', len(video_labels['4']))
print('Number of frame in class 5: ', len(video_labels['5']))
# print('Total number of cheating actions: ', sum())

Number of frame in class 0:  11839
Number of frame in class 1:  3007
Number of frame in class 2:  7160
Number of frame in class 3:  1744
Number of frame in class 4:  239
Number of frame in class 5:  852


### Note
- 0 : not cheating
- 1 : cheating from book
- 2 : talking
- 3 : using internet
- 4 : using phone
- 5 : ask friends over phone

- combine 4 + 1
- combine 5 + 2
- remove 3

new_labels consists of:
- 0 : not cheating
- 1 : book + phone
- 2 : talking all (overphone)

In [9]:
# Re process data
new_labels = {}
new_labels['0'] = video_labels['0']
new_labels['1'] = video_labels['1'] + video_labels['4']
new_labels['2'] = video_labels['2'] + video_labels['5']

# Check
print('Number of frame in class 0: ', len(new_labels['0']))
print('Number of frame in class 1: ', len(new_labels['1']))
print('Number of frame in class 2: ', len(new_labels['2']))

Number of frame in class 0:  11839
Number of frame in class 1:  3246
Number of frame in class 2:  8012


In [10]:
# Slice number of frame to be 3000 in total
MAXIMUM_LENGTH = 1740
new_labels['0'] = new_labels['0'][:3000]
new_labels['1'] = new_labels['1'][:3000]
new_labels['2'] = new_labels['2'][:3000]

# Check
print('Number of frame in class 0: ', len(new_labels['0']))
print('Number of frame in class 1: ', len(new_labels['1']))
print('Number of frame in class 2: ', len(new_labels['2']))

Number of frame in class 0:  3000
Number of frame in class 1:  3000
Number of frame in class 2:  3000


In [12]:
# Grouping sequence
# One sequence consists of 30 frames
SEQUENCE_LENGTH = 30
sequences = []
labels = []
window = []

for key in new_labels:
    videos = new_labels[key]
    sequence = []
    for video in videos:
        frame = np.load(os.path.join(folder_dir, video))
        sequence.append(frame)
        
        if len(sequence) == 30:
            sequences.append(sequence)
            labels.append(key)
            sequence = []
        
        


In [13]:
sequences = np.array(sequences)
print(sequences.shape)
labels = np.array(labels)
print(labels.shape)

(300, 30, 1404)
(300,)


In [14]:
print('(Total Sequences, Sequence Length, Number of Feature)')

(Total Sequences, Sequence Length, Number of Feature)


In [15]:
labels

array(['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1

In [16]:
# Save output
np.save(os.path.join(target_dir, 'data_x.npy'), sequences)
np.save(os.path.join(target_dir, 'dataset_y.npy'), labels)