In [1]:
import os
from sklearn.model_selection import train_test_split
import shutil
import pandas as pd

In [2]:
base_dir = '/data/private/SU/bbchip13/chainsaw_classification/'
chainsaw_dir = base_dir+'wav_chainsaw_5sec/'
no_chainsaw_dir = base_dir+'/ESC-50-master/split_wav/'

result_base_dir = base_dir+'data/'

In [3]:
merge_list = []

chainsaw_filenames = [chainsaw_dir+filename for filename in os.listdir(chainsaw_dir)
                         if filename.endswith('.wav')]
chainsaw_filenames = [filename for filename in chainsaw_filenames
                         if os.path.getsize(filename) == 441078]

x_train_chainsaw, x_test_chainsaw \
    = train_test_split(chainsaw_filenames, test_size = 0.3, random_state = 7)
x_train_chainsaw, x_val_chainsaw \
    = train_test_split(x_train_chainsaw, test_size = 0.3, random_state = 7)
    
merge_list += [(os.path.basename(filename), 1, 'train') for filename in x_train_chainsaw]
merge_list += [(os.path.basename(filename), 1, 'val') for filename in x_val_chainsaw]
merge_list += [(os.path.basename(filename), 1, 'test') for filename in x_test_chainsaw]

os.makedirs(result_base_dir, exist_ok=True)
for filename in chainsaw_filenames:
    shutil.copy(filename, result_base_dir)

no_chainsaw_dirs = [no_chainsaw_dir+dirname for dirname in os.listdir(no_chainsaw_dir)
                       if os.path.isdir(no_chainsaw_dir+dirname)]
for dirname in no_chainsaw_dirs:
    no_chainsaw_filenames = [dirname+'/'+filename for filename in os.listdir(dirname)
                             if filename.endswith('.wav')]
    
    x_train_no_chainsaw, x_test_no_chainsaw \
        = train_test_split(no_chainsaw_filenames, test_size = 0.33, random_state = 7)
    x_train_no_chainsaw, x_val_no_chainsaw \
        = train_test_split(x_train_no_chainsaw, test_size = 0.33, random_state = 7)
        
    merge_list += [(os.path.basename(filename), 0, 'train') for filename in x_train_no_chainsaw]
    merge_list += [(os.path.basename(filename), 0, 'val') for filename in x_val_no_chainsaw]
    merge_list += [(os.path.basename(filename), 0, 'test') for filename in x_test_no_chainsaw]
    
    for filename in no_chainsaw_filenames:
        shutil.copy(filename, result_base_dir)

In [4]:
merge_list[:10]

[('1543_51.14.wav', 1, 'train'),
 ('593_50.04.wav', 1, 'train'),
 ('1469_4.35.wav', 1, 'train'),
 ('359_20.05.wav', 1, 'train'),
 ('396_267.27.wav', 1, 'train'),
 ('1355_498.5.wav', 1, 'train'),
 ('219_11.17.wav', 1, 'train'),
 ('777_54.17.wav', 1, 'train'),
 ('827_10.9.wav', 1, 'train'),
 ('806_10.42.wav', 1, 'train')]

In [5]:
df = pd.DataFrame({
                    'filename':[filename for filename, _, _ in merge_list],
                    'label':[label for _, label, _ in merge_list],
                    'type':[data_type for _, _, data_type in merge_list]
                  })
df = df.sample(frac=1).reset_index(drop=True)
df.head

<bound method NDFrame.head of                filename  label   type
0       1247_387.72.wav      1  train
1     4-161127-A-10.wav      0  train
2          827_10.9.wav      1  train
3       1332_107.15.wav      1   test
4     4-163609-B-16.wav      0    val
5     3-160119-A-15.wav      0   test
6     4-161105-A-47.wav      0  train
7     3-177082-A-22.wav      0  train
8         237_20.03.wav      1  train
9          190_7.23.wav      1    val
10        571_54.48.wav      1   test
11        361_199.6.wav      1    val
12      1211_140.47.wav      1  train
13    3-103401-D-33.wav      0   test
14        532_21.04.wav      1  train
15         309_0.78.wav      1  train
16        448_53.25.wav      1    val
17        435_45.64.wav      1    val
18    1-101296-B-19.wav      0    val
19      1165_104.69.wav      1    val
20       783_104.47.wav      1   test
21         49_10.19.wav      1   test
22      1198_241.64.wav      1  train
23    2-106486-A-44.wav      0  train
24    4-191297-A-28.

In [6]:
df.to_csv('data_annotations.csv', index=False)

In [7]:
df = pd.read_csv('data_annotations.csv')
for idx, filename, label, data_type in df.itertuples():
    result_dir = result_base_dir+data_type+'/'
    os.makedirs(result_dir, exist_ok=True)
    shutil.move(result_base_dir+filename, result_dir)